From 805280685fe656f3c0b163e442e4728048de0b2d Mon Sep 17 00:00:00 2001 From: Niels Praet Date: Wed, 2 Aug 2023 16:26:12 +0200 Subject: [PATCH 01/26] =?UTF-8?q?=E2=9C=A8=20feat:=20add=20nan=20implement?= =?UTF-8?q?ation=20of=20m4=20algorithm?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- downsample_rs/src/m4.rs | 113 +++++++++++++++++++++++++--------------- 1 file changed, 71 insertions(+), 42 deletions(-) diff --git a/downsample_rs/src/m4.rs b/downsample_rs/src/m4.rs index fa3dd50..21ff63c 100644 --- a/downsample_rs/src/m4.rs +++ b/downsample_rs/src/m4.rs @@ -1,4 +1,4 @@ -use argminmax::ArgMinMax; +use argminmax::{ArgMinMax, NaNArgMinMax}; use num_traits::{AsPrimitive, FromPrimitive}; use rayon::iter::IndexedParallelIterator; use rayon::prelude::*; @@ -12,63 +12,92 @@ use crate::types::Num; // ----------- WITH X -pub fn m4_with_x(x: &[Tx], arr: &[Ty], n_out: usize) -> Vec -where - for<'a> &'a [Ty]: ArgMinMax, - Tx: Num + FromPrimitive + AsPrimitive, - Ty: Copy + PartialOrd, -{ - assert_eq!(n_out % 4, 0); - let bin_idx_iterator = get_equidistant_bin_idx_iterator(x, n_out / 4); - m4_generic_with_x(arr, bin_idx_iterator, n_out, |arr| arr.argminmax()) +macro_rules! m4_with_x { + ($func_name:ident, $trait:path, $func:expr) => { + pub fn $func_name(x: &[Tx], arr: &[Ty], n_out: usize) -> Vec + where + for<'a> &'a [Ty]: $trait, + Tx: Num + FromPrimitive + AsPrimitive, + Ty: Copy + PartialOrd, + { + assert_eq!(n_out % 4, 0); + let bin_idx_iterator = get_equidistant_bin_idx_iterator(x, n_out / 4); + m4_generic_with_x(arr, bin_idx_iterator, n_out, $func) + } + }; } +m4_with_x!(m4_with_x, ArgMinMax, |arr| arr.argminmax()); +m4_with_x!(m4_with_x_nan, NaNArgMinMax, |arr| arr.nanargminmax()); + // ----------- WITHOUT X -pub fn m4_without_x(arr: &[T], n_out: usize) -> Vec -where - for<'a> &'a [T]: ArgMinMax, -{ - assert_eq!(n_out % 4, 0); - m4_generic(arr, n_out, |arr| arr.argminmax()) +macro_rules! m4_without_x { + ($func_name:ident, $trait:path, $func:expr) => { + pub fn $func_name(arr: &[T], n_out: usize) -> Vec + where + for<'a> &'a [T]: $trait, + { + assert_eq!(n_out % 4, 0); + m4_generic(arr, n_out, $func) + } + }; } +m4_without_x!(m4_without_x, ArgMinMax, |arr| arr.argminmax()); +m4_without_x!(m4_without_x_nan, NaNArgMinMax, |arr| arr.nanargminmax()); + // ------------------------------------- PARALLEL -------------------------------------- // ----------- WITH X -pub fn m4_with_x_parallel( - x: &[Tx], - arr: &[Ty], - n_out: usize, - n_threads: usize, -) -> Vec -where - for<'a> &'a [Ty]: ArgMinMax, - Tx: Num + FromPrimitive + AsPrimitive + Send + Sync, - Ty: Copy + PartialOrd + Send + Sync, -{ - assert_eq!(n_out % 4, 0); - let bin_idx_iterator = get_equidistant_bin_idx_iterator_parallel(x, n_out / 4, n_threads); - m4_generic_with_x_parallel(arr, bin_idx_iterator, n_out, n_threads, |arr| { - arr.argminmax() - }) +macro_rules! m4_with_x_parallel { + ($func_name:ident, $trait:path, $func:expr) => { + pub fn $func_name( + x: &[Tx], + arr: &[Ty], + n_out: usize, + n_threads: usize, + ) -> Vec + where + for<'a> &'a [Ty]: $trait, + Tx: Num + FromPrimitive + AsPrimitive + Send + Sync, + Ty: Copy + PartialOrd + Send + Sync, + { + assert_eq!(n_out % 4, 0); + let bin_idx_iterator = + get_equidistant_bin_idx_iterator_parallel(x, n_out / 4, n_threads); + m4_generic_with_x_parallel(arr, bin_idx_iterator, n_out, n_threads, $func) + } + }; } +m4_with_x_parallel!(m4_with_x_parallel, ArgMinMax, |arr| arr.argminmax()); +m4_with_x_parallel!(m4_with_x_parallel_nan, NaNArgMinMax, |arr| arr + .nanargminmax()); + // ----------- WITHOUT X -pub fn m4_without_x_parallel( - arr: &[T], - n_out: usize, - n_threads: usize, -) -> Vec -where - for<'a> &'a [T]: ArgMinMax, -{ - assert_eq!(n_out % 4, 0); - m4_generic_parallel(arr, n_out, n_threads, |arr| arr.argminmax()) +macro_rules! m4_without_x_parallel { + ($func_name:ident, $trait:path, $func:expr) => { + pub fn $func_name( + arr: &[T], + n_out: usize, + n_threads: usize, + ) -> Vec + where + for<'a> &'a [T]: $trait, + { + assert_eq!(n_out % 4, 0); + m4_generic_parallel(arr, n_out, n_threads, $func) + } + }; } +m4_without_x_parallel!(m4_without_x_parallel, ArgMinMax, |arr| arr.argminmax()); +m4_without_x_parallel!(m4_without_x_parallel_nan, NaNArgMinMax, |arr| arr + .nanargminmax()); + // TODO: check for duplicate data in the output array // -> In the current implementation we always add 4 datapoints per bin (if of // course the bin has >= 4 datapoints). However, the argmin and argmax might From 4209d6e9a2d621585938d501a600ebe69276b802 Mon Sep 17 00:00:00 2001 From: Niels Praet Date: Wed, 2 Aug 2023 16:39:08 +0200 Subject: [PATCH 02/26] =?UTF-8?q?=E2=9C=A8=20feat:=20add=20nan=20implement?= =?UTF-8?q?ation=20of=20minmax=20algorithm?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- downsample_rs/src/minmax.rs | 115 +++++++++++++++++++++++------------- 1 file changed, 73 insertions(+), 42 deletions(-) diff --git a/downsample_rs/src/minmax.rs b/downsample_rs/src/minmax.rs index 6858164..36df568 100644 --- a/downsample_rs/src/minmax.rs +++ b/downsample_rs/src/minmax.rs @@ -1,7 +1,7 @@ use rayon::iter::IndexedParallelIterator; use rayon::prelude::*; -use argminmax::ArgMinMax; +use argminmax::{ArgMinMax, NaNArgMinMax}; use num_traits::{AsPrimitive, FromPrimitive}; use super::searchsorted::{ @@ -13,62 +13,93 @@ use super::types::Num; // ----------- WITH X -pub fn min_max_with_x(x: &[Tx], arr: &[Ty], n_out: usize) -> Vec -where - for<'a> &'a [Ty]: ArgMinMax, - Tx: Num + FromPrimitive + AsPrimitive, - Ty: Copy + PartialOrd, -{ - assert_eq!(n_out % 2, 0); - let bin_idx_iterator = get_equidistant_bin_idx_iterator(x, n_out / 2); - min_max_generic_with_x(arr, bin_idx_iterator, n_out, |arr| arr.argminmax()) +macro_rules! min_max_with_x { + ($func_name:ident, $trait:path, $func:expr) => { + pub fn $func_name(x: &[Tx], arr: &[Ty], n_out: usize) -> Vec + where + for<'a> &'a [Ty]: $trait, + Tx: Num + FromPrimitive + AsPrimitive, + Ty: Copy + PartialOrd, + { + assert_eq!(n_out % 2, 0); + let bin_idx_iterator = get_equidistant_bin_idx_iterator(x, n_out / 2); + min_max_generic_with_x(arr, bin_idx_iterator, n_out, $func) + } + }; } +min_max_with_x!(min_max_with_x, ArgMinMax, |arr| arr.argminmax()); +min_max_with_x!(min_max_with_x_nan, NaNArgMinMax, |arr| arr.nanargminmax()); + // ----------- WITHOUT X -pub fn min_max_without_x(arr: &[T], n_out: usize) -> Vec -where - for<'a> &'a [T]: ArgMinMax, -{ - assert_eq!(n_out % 2, 0); - min_max_generic(arr, n_out, |arr| arr.argminmax()) +macro_rules! min_max_without_x { + ($func_name:ident, $trait:path, $func:expr) => { + pub fn $func_name(arr: &[T], n_out: usize) -> Vec + where + for<'a> &'a [T]: $trait, + { + assert_eq!(n_out % 2, 0); + min_max_generic(arr, n_out, $func) + } + }; } +min_max_without_x!(min_max_without_x, ArgMinMax, |arr| arr.argminmax()); +min_max_without_x!(min_max_without_x_nan, NaNArgMinMax, |arr| arr + .nanargminmax()); + // ------------------------------------- PARALLEL -------------------------------------- // ----------- WITH X -pub fn min_max_with_x_parallel( - x: &[Tx], - arr: &[Ty], - n_out: usize, - n_threads: usize, -) -> Vec -where - for<'a> &'a [Ty]: ArgMinMax, - Tx: Num + FromPrimitive + AsPrimitive + Send + Sync, - Ty: Copy + PartialOrd + Send + Sync, -{ - assert_eq!(n_out % 2, 0); - let bin_idx_iterator = get_equidistant_bin_idx_iterator_parallel(x, n_out / 2, n_threads); - min_max_generic_with_x_parallel(arr, bin_idx_iterator, n_out, n_threads, |arr| { - arr.argminmax() - }) +macro_rules! min_max_with_x_parallel { + ($func_name:ident, $trait:path, $func:expr) => { + pub fn $func_name( + x: &[Tx], + arr: &[Ty], + n_out: usize, + n_threads: usize, + ) -> Vec + where + for<'a> &'a [Ty]: $trait, + Tx: Num + FromPrimitive + AsPrimitive + Send + Sync, + Ty: Copy + PartialOrd + Send + Sync, + { + assert_eq!(n_out % 2, 0); + let bin_idx_iterator = + get_equidistant_bin_idx_iterator_parallel(x, n_out / 2, n_threads); + min_max_generic_with_x_parallel(arr, bin_idx_iterator, n_out, n_threads, $func) + } + }; } +min_max_with_x_parallel!(min_max_with_x_parallel, ArgMinMax, |arr| arr.argminmax()); +min_max_with_x_parallel!(min_max_with_x_parallel_nan, NaNArgMinMax, |arr| arr + .nanargminmax()); + // ----------- WITHOUT X -pub fn min_max_without_x_parallel( - arr: &[T], - n_out: usize, - n_threads: usize, -) -> Vec -where - for<'a> &'a [T]: ArgMinMax, -{ - assert_eq!(n_out % 2, 0); - min_max_generic_parallel(arr, n_out, n_threads, |arr| arr.argminmax()) +macro_rules! min_max_without_x_parallel { + ($func_name:ident, $trait:path, $func:expr) => { + pub fn $func_name( + arr: &[T], + n_out: usize, + n_threads: usize, + ) -> Vec + where + for<'a> &'a [T]: $trait, + { + assert_eq!(n_out % 2, 0); + min_max_generic_parallel(arr, n_out, n_threads, $func) + } + }; } + +min_max_without_x_parallel!(min_max_without_x_parallel, ArgMinMax, |arr| arr.argminmax()); +min_max_without_x_parallel!(min_max_without_x_parallel_nan, NaNArgMinMax, |arr| arr + .nanargminmax()); + // ----------------- GENERICS // // --------------------- WITHOUT X From 5199d87bd776a5af8708445c7e63daf0ad536d9b Mon Sep 17 00:00:00 2001 From: Niels Praet Date: Wed, 2 Aug 2023 16:49:57 +0200 Subject: [PATCH 03/26] =?UTF-8?q?=E2=9C=A8=20feat:=20add=20nan=20implement?= =?UTF-8?q?ation=20of=20minmaxlttb=20algorithm?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- downsample_rs/src/minmaxlttb.rs | 196 ++++++++++++++++++++------------ 1 file changed, 124 insertions(+), 72 deletions(-) diff --git a/downsample_rs/src/minmaxlttb.rs b/downsample_rs/src/minmaxlttb.rs index 9a0b708..3f78412 100644 --- a/downsample_rs/src/minmaxlttb.rs +++ b/downsample_rs/src/minmaxlttb.rs @@ -1,4 +1,4 @@ -use argminmax::ArgMinMax; +use argminmax::{ArgMinMax, NaNArgMinMax}; use super::lttb::{lttb_with_x, lttb_without_x}; use super::types::Num; @@ -10,91 +10,143 @@ use num_traits::{AsPrimitive, FromPrimitive}; // ----------- WITH X -pub fn minmaxlttb_with_x + FromPrimitive, Ty: Num + AsPrimitive>( - x: &[Tx], - y: &[Ty], - n_out: usize, - minmax_ratio: usize, -) -> Vec -where - for<'a> &'a [Ty]: ArgMinMax, -{ - minmaxlttb_generic( - x, - y, - n_out, - minmax_ratio, - None, - MinMaxFunctionWithX::Serial(minmax::min_max_with_x), - ) +macro_rules! minmaxlttb_with_x { + ($func_name:ident, $trait:path, $func:expr) => { + pub fn $func_name + FromPrimitive, Ty: Num + AsPrimitive>( + x: &[Tx], + y: &[Ty], + n_out: usize, + minmax_ratio: usize, + ) -> Vec + where + for<'a> &'a [Ty]: $trait, + { + minmaxlttb_generic( + x, + y, + n_out, + minmax_ratio, + None, + MinMaxFunctionWithX::Serial($func), + ) + } + }; } +minmaxlttb_with_x!(minmaxlttb_with_x, ArgMinMax, minmax::min_max_with_x); +minmaxlttb_with_x!( + minmaxlttb_with_x_nan, + NaNArgMinMax, + minmax::min_max_with_x_nan +); + // ----------- WITHOUT X -pub fn minmaxlttb_without_x>( - y: &[Ty], - n_out: usize, - minmax_ratio: usize, -) -> Vec -where - for<'a> &'a [Ty]: ArgMinMax, -{ - minmaxlttb_generic_without_x( - y, - n_out, - minmax_ratio, - None, - MinMaxFunctionWithoutX::Serial(minmax::min_max_without_x), - ) +macro_rules! minmaxlttb_without_x { + ($func_name:ident, $trait:path, $func:expr) => { + pub fn $func_name>( + y: &[Ty], + n_out: usize, + minmax_ratio: usize, + ) -> Vec + where + for<'a> &'a [Ty]: $trait, + { + minmaxlttb_generic_without_x( + y, + n_out, + minmax_ratio, + None, + MinMaxFunctionWithoutX::Serial($func), + ) + } + }; } +minmaxlttb_without_x!(minmaxlttb_without_x, ArgMinMax, minmax::min_max_without_x); +minmaxlttb_without_x!( + minmaxlttb_without_x_nan, + NaNArgMinMax, + minmax::min_max_without_x_nan +); + // ------------------------------------- PARALLEL -------------------------------------- // ----------- WITH X -pub fn minmaxlttb_with_x_parallel< - Tx: Num + AsPrimitive + FromPrimitive + Send + Sync, - Ty: Num + AsPrimitive + Send + Sync, ->( - x: &[Tx], - y: &[Ty], - n_out: usize, - minmax_ratio: usize, - n_threads: usize, -) -> Vec -where - for<'a> &'a [Ty]: ArgMinMax, -{ - minmaxlttb_generic( - x, - y, - n_out, - minmax_ratio, - Some(n_threads), - MinMaxFunctionWithX::Parallel(minmax::min_max_with_x_parallel), - ) +macro_rules! minmaxlttb_with_x_parallel { + ($func_name:ident, $trait:path, $func:expr) => { + pub fn $func_name< + Tx: Num + AsPrimitive + FromPrimitive + Send + Sync, + Ty: Num + AsPrimitive + Send + Sync, + >( + x: &[Tx], + y: &[Ty], + n_out: usize, + minmax_ratio: usize, + n_threads: usize, + ) -> Vec + where + for<'a> &'a [Ty]: $trait, + { + minmaxlttb_generic( + x, + y, + n_out, + minmax_ratio, + Some(n_threads), + MinMaxFunctionWithX::Parallel($func), + ) + } + }; } +minmaxlttb_with_x_parallel!( + minmaxlttb_with_x_parallel, + ArgMinMax, + minmax::min_max_with_x_parallel +); +minmaxlttb_with_x_parallel!( + minmaxlttb_with_x_parallel_nan, + NaNArgMinMax, + minmax::min_max_with_x_parallel_nan +); + // ----------- WITHOUT X -pub fn minmaxlttb_without_x_parallel + Send + Sync>( - y: &[Ty], - n_out: usize, - minmax_ratio: usize, - n_threads: usize, -) -> Vec -where - for<'a> &'a [Ty]: ArgMinMax, -{ - minmaxlttb_generic_without_x( - y, - n_out, - minmax_ratio, - Some(n_threads), - MinMaxFunctionWithoutX::Parallel(minmax::min_max_without_x_parallel), - ) +macro_rules! minmaxlttb_without_x_parallel { + ($func_name:ident, $trait:path, $func:expr) => { + pub fn $func_name + Send + Sync>( + y: &[Ty], + n_out: usize, + minmax_ratio: usize, + n_threads: usize, + ) -> Vec + where + for<'a> &'a [Ty]: $trait, + { + minmaxlttb_generic_without_x( + y, + n_out, + minmax_ratio, + Some(n_threads), + MinMaxFunctionWithoutX::Parallel($func), + ) + } + }; } +minmaxlttb_without_x_parallel!( + minmaxlttb_without_x_parallel, + ArgMinMax, + minmax::min_max_without_x_parallel +); +minmaxlttb_without_x_parallel!( + minmaxlttb_without_x_parallel_nan, + NaNArgMinMax, + minmax::min_max_without_x_parallel_nan +); + // ----------------------------------- GENERICS ------------------------------------ // types to make function signatures easier to read @@ -121,7 +173,7 @@ pub(crate) fn minmaxlttb_generic, Ty: Num + AsPrimiti f_minmax: MinMaxFunctionWithX, ) -> Vec where - for<'a> &'a [Ty]: ArgMinMax, + // for<'a> &'a [Ty]: ArgMinMax, { assert_eq!(x.len(), y.len()); assert!(minmax_ratio > 1); @@ -180,7 +232,7 @@ pub(crate) fn minmaxlttb_generic_without_x>( f_minmax: MinMaxFunctionWithoutX, ) -> Vec where - for<'a> &'a [Ty]: ArgMinMax, + // for<'a> &'a [Ty]: ArgMinMax, { assert!(minmax_ratio > 1); // Apply first min max aggregation (if above ratio) From 496def7f76568b52ab30a2907f012dd55d441317 Mon Sep 17 00:00:00 2001 From: Niels Praet Date: Thu, 3 Aug 2023 09:46:41 +0200 Subject: [PATCH 04/26] =?UTF-8?q?=F0=9F=92=A9=20feat:=20update=20lib=20scr?= =?UTF-8?q?ipt=20to=20incorporate=20nan-handling=20functions?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/lib.rs | 124 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 5a93afc..b7513c5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -98,6 +98,14 @@ macro_rules! _create_pyfuncs_without_x_generic { } )* }; + + (@nan $create_macro:ident, $resample_mod:ident, $resample_fn:ident, $mod:ident, $($t:ty)*) => { + $( + paste! { + $create_macro!([], $resample_mod, $resample_fn, $t, $mod); + } + )* + }; } // With x-range @@ -216,12 +224,46 @@ macro_rules! _create_pyfuncs_with_x_generic { // and https://users.rust-lang.org/t/tail-recursive-macros/905/3 } +// TODO: there must be a better way to combine normal and nan macros +macro_rules! _create_nan_pyfuncs_with_x_generic { + // ($create_macro:ident, $resample_mod:ident, $resample_fn:ident, $mod:ident, $($t:ty)+) => { + // // The macro will implement the function for all combinations of $t (for type x and y). + // // (duplicate the list of types to iterate over all combinations) + // _create_pyfuncs_with_x_generic!(@inner $create_macro, $resample_mod, $resample_fn, $mod, $($t)+; $($t),+); + // }; + + ($create_macro:ident, $resample_mod:ident, $resample_fn:ident, $mod:ident, $($tx:ty)+, $($ty:ty)+) => { + // The macro will implement the function for all combinations of $tx and $ty (for respectively type x and y). + _create_nan_pyfuncs_with_x_generic!(@inner $create_macro, $resample_mod, $resample_fn, $mod, $($tx)+; $($ty),+); + }; + + // Base case: there is only one type (for y) left + (@inner $create_macro:ident, $resample_mod:ident, $resample_fn:ident, $mod:ident, $($tx:ty)+; $ty:ty) => { + $( + paste! { + $create_macro!([], $resample_mod, $resample_fn, $tx, $ty, $mod); + } + )* + }; + // The head/tail recursion: pick the first element -> apply the base case, and recurse over the rest. + (@inner $create_macro:ident, $resample_mod:ident, $resample_fn:ident, $mod:ident, $($tx:ty)+; $ty_head:ty, $($ty_rest:ty),+) => { + _create_nan_pyfuncs_with_x_generic!(@inner $create_macro, $resample_mod, $resample_fn, $mod, $($tx)+; $ty_head); + _create_nan_pyfuncs_with_x_generic!(@inner $create_macro, $resample_mod, $resample_fn, $mod, $($tx)+; $($ty_rest),+); + }; + + // Huge thx to https://stackoverflow.com/a/54552848 + // and https://users.rust-lang.org/t/tail-recursive-macros/905/3 +} // ------ Main macros ------ macro_rules! _create_pyfuncs_without_x_helper { ($pyfunc_fn:ident, $resample_mod:ident, $resample_fn:ident, $mod:ident) => { _create_pyfuncs_without_x_generic!($pyfunc_fn, $resample_mod, $resample_fn, $mod, f16 f32 f64 i8 i16 i32 i64 u8 u16 u32 u64); }; + + (@nan $pyfunc_fn:ident, $resample_mod:ident, $resample_fn:ident, $mod:ident) => { + _create_pyfuncs_without_x_generic!(@nan $pyfunc_fn, $resample_mod, $resample_fn, $mod, f16 f32 f64); + }; } macro_rules! create_pyfuncs_without_x { @@ -234,6 +276,14 @@ macro_rules! create_pyfuncs_without_x { $mod ); }; + (@nan $resample_mod:ident, $resample_fn:ident, $mod:ident) => { + _create_pyfuncs_without_x_helper!(@nan + _create_pyfunc_without_x, + $resample_mod, + $resample_fn, + $mod + ); + }; (@threaded $resample_mod:ident, $resample_fn:ident, $mod:ident) => { _create_pyfuncs_without_x_helper!( _create_pyfunc_without_x_multithreaded, @@ -242,6 +292,14 @@ macro_rules! create_pyfuncs_without_x { $mod ); }; + (@nan @threaded $resample_mod:ident, $resample_fn:ident, $mod:ident) => { + _create_pyfuncs_without_x_helper!(@nan + _create_pyfunc_without_x_multithreaded, + $resample_mod, + $resample_fn, + $mod + ); + }; } macro_rules! create_pyfuncs_without_x_with_ratio { @@ -254,6 +312,14 @@ macro_rules! create_pyfuncs_without_x_with_ratio { $mod ); }; + (@nan $resample_mod:ident, $resample_fn:ident, $mod:ident) => { + _create_pyfuncs_without_x_helper!(@nan + _create_pyfunc_without_x_with_ratio, + $resample_mod, + $resample_fn, + $mod + ); + }; (@threaded $resample_mod:ident, $resample_fn:ident, $mod:ident) => { _create_pyfuncs_without_x_helper!( _create_pyfunc_without_x_with_ratio_multithreaded, @@ -262,12 +328,23 @@ macro_rules! create_pyfuncs_without_x_with_ratio { $mod ); }; + (@nan @threaded $resample_mod:ident, $resample_fn:ident, $mod:ident) => { + _create_pyfuncs_without_x_helper!(@nan + _create_pyfunc_without_x_with_ratio_multithreaded, + $resample_mod, + $resample_fn, + $mod + ); + }; } macro_rules! _create_pyfuncs_with_x_helper { ($pyfunc_fn:ident, $resample_mod:ident, $resample_fn:ident, $mod:ident) => { _create_pyfuncs_with_x_generic!($pyfunc_fn, $resample_mod, $resample_fn, $mod, f32 f64 i16 i32 i64 u16 u32 u64, f16 f32 f64 i8 i16 i32 i64 u8 u16 u32 u64); }; + (@nan $pyfunc_fn:ident, $resample_mod:ident, $resample_fn:ident, $mod:ident) => { + _create_nan_pyfuncs_with_x_generic!($pyfunc_fn, $resample_mod, $resample_fn, $mod, f32 f64 i16 i32 i64 u16 u32 u64, f16 f32 f64); + }; } macro_rules! create_pyfuncs_with_x { @@ -275,6 +352,9 @@ macro_rules! create_pyfuncs_with_x { ($resample_mod:ident, $resample_fn:ident, $mod:ident) => { _create_pyfuncs_with_x_helper!(_create_pyfunc_with_x, $resample_mod, $resample_fn, $mod); }; + (@nan $resample_mod:ident, $resample_fn:ident, $mod:ident) => { + _create_pyfuncs_with_x_helper!(@nan _create_pyfunc_with_x, $resample_mod, $resample_fn, $mod); + }; (@threaded $resample_mod:ident, $resample_fn:ident, $mod:ident) => { _create_pyfuncs_with_x_helper!( _create_pyfunc_with_x_multithreaded, @@ -283,6 +363,14 @@ macro_rules! create_pyfuncs_with_x { $mod ); }; + (@nan @threaded $resample_mod:ident, $resample_fn:ident, $mod:ident) => { + _create_pyfuncs_with_x_helper!(@nan + _create_pyfunc_with_x_multithreaded, + $resample_mod, + $resample_fn, + $mod + ); + }; } macro_rules! create_pyfuncs_with_x_with_ratio { @@ -295,6 +383,14 @@ macro_rules! create_pyfuncs_with_x_with_ratio { $mod ); }; + (@nan $resample_mod:ident, $resample_fn:ident, $mod:ident) => { + _create_pyfuncs_with_x_helper!(@nan + _create_pyfunc_with_x_with_ratio, + $resample_mod, + $resample_fn, + $mod + ); + }; (@threaded $resample_mod:ident, $resample_fn:ident, $mod:ident) => { _create_pyfuncs_with_x_helper!( _create_pyfunc_with_x_with_ratio_multithreaded, @@ -303,6 +399,14 @@ macro_rules! create_pyfuncs_with_x_with_ratio { $mod ); }; + (@nan @threaded $resample_mod:ident, $resample_fn:ident, $mod:ident) => { + _create_pyfuncs_with_x_helper!(@nan + _create_pyfunc_with_x_with_ratio_multithreaded, + $resample_mod, + $resample_fn, + $mod + ); + }; } // -------------------------------------- MINMAX --------------------------------------- @@ -319,11 +423,13 @@ fn minmax(_py: Python<'_>, m: &PyModule) -> PyResult<()> { // ----- WITHOUT X { create_pyfuncs_without_x!(minmax_mod, min_max_without_x, sequential_mod); + create_pyfuncs_without_x!(@nan minmax_mod, min_max_without_x_nan, sequential_mod); } // ----- WITH X { create_pyfuncs_with_x!(minmax_mod, min_max_with_x, sequential_mod); + create_pyfuncs_with_x!(@nan minmax_mod, min_max_with_x_nan, sequential_mod); } // ----------------- PARALLEL @@ -333,11 +439,13 @@ fn minmax(_py: Python<'_>, m: &PyModule) -> PyResult<()> { // ----- WITHOUT X { create_pyfuncs_without_x!(@threaded minmax_mod, min_max_without_x_parallel, parallel_mod); + create_pyfuncs_without_x!(@nan @threaded minmax_mod, min_max_without_x_parallel_nan, parallel_mod); } // ----- WITH X { create_pyfuncs_with_x!(@threaded minmax_mod, min_max_with_x_parallel, parallel_mod); + create_pyfuncs_with_x!(@nan @threaded minmax_mod, min_max_with_x_parallel_nan, parallel_mod); } // Add the sub modules to the module @@ -361,11 +469,13 @@ fn m4(_py: Python, m: &PyModule) -> PyResult<()> { // ----- WITHOUT X { create_pyfuncs_without_x!(m4_mod, m4_without_x, sequential_mod); + create_pyfuncs_without_x!(@nan m4_mod, m4_without_x_nan, sequential_mod); } // ----- WITH X { create_pyfuncs_with_x!(m4_mod, m4_with_x, sequential_mod); + create_pyfuncs_with_x!(@nan m4_mod, m4_with_x_nan, sequential_mod); } // ----------------- PARALLEL @@ -375,11 +485,13 @@ fn m4(_py: Python, m: &PyModule) -> PyResult<()> { // ----- WITHOUT X { create_pyfuncs_without_x!(@threaded m4_mod, m4_without_x_parallel, parallel_mod); + create_pyfuncs_without_x!(@nan @threaded m4_mod, m4_without_x_parallel, parallel_mod); } // ----- WITH X { create_pyfuncs_with_x!(@threaded m4_mod, m4_with_x_parallel, parallel_mod); + create_pyfuncs_with_x!(@nan @threaded m4_mod, m4_with_x_parallel, parallel_mod); } // Add the sub modules to the module @@ -431,11 +543,13 @@ fn minmaxlttb(_py: Python, m: &PyModule) -> PyResult<()> { // ----- WITHOUT X { create_pyfuncs_without_x_with_ratio!(minmaxlttb_mod, minmaxlttb_without_x, sequential_mod); + create_pyfuncs_without_x_with_ratio!(@nan minmaxlttb_mod, minmaxlttb_without_x_nan, sequential_mod); } // ----- WITH X { create_pyfuncs_with_x_with_ratio!(minmaxlttb_mod, minmaxlttb_with_x, sequential_mod); + create_pyfuncs_with_x_with_ratio!(@nan minmaxlttb_mod, minmaxlttb_with_x_nan, sequential_mod); } // ----------------- PARALLEL @@ -449,6 +563,11 @@ fn minmaxlttb(_py: Python, m: &PyModule) -> PyResult<()> { minmaxlttb_without_x_parallel, parallel_mod ); + create_pyfuncs_without_x_with_ratio!(@nan @threaded + minmaxlttb_mod, + minmaxlttb_without_x_parallel, + parallel_mod + ); } // ----- WITH X @@ -458,6 +577,11 @@ fn minmaxlttb(_py: Python, m: &PyModule) -> PyResult<()> { minmaxlttb_with_x_parallel, parallel_mod ); + create_pyfuncs_with_x_with_ratio!(@nan @threaded + minmaxlttb_mod, + minmaxlttb_with_x_parallel, + parallel_mod + ); } // Add the submodules to the module From 3b51b159b8169e1bcd37ef85d49ebd96a98e862e Mon Sep 17 00:00:00 2001 From: Niels Praet Date: Thu, 3 Aug 2023 10:06:02 +0200 Subject: [PATCH 05/26] =?UTF-8?q?=F0=9F=9A=A7=20feat:=20add=20new=20nan=20?= =?UTF-8?q?downsampler?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tsdownsample/downsampling_interface.py | 163 +++++++++++++++++++++++++ 1 file changed, 163 insertions(+) diff --git a/tsdownsample/downsampling_interface.py b/tsdownsample/downsampling_interface.py index de20d46..34a8f78 100644 --- a/tsdownsample/downsampling_interface.py +++ b/tsdownsample/downsampling_interface.py @@ -362,3 +362,166 @@ def __deepcopy__(self, memo): else: setattr(result, k, deepcopy(v, memo)) return result + +_nan_y_rust_dtypes = ["float16", "float32", "float64"] +NAN_DOWNSAMPLE_F = "downsample_nan" + +class AbstractRustNaNDownsampler(AbstractRustDownsampler, ABC): + """RustNaNDownsampler interface-class, subclassed by concrete downsamplers.""" + + def __init__(self): + super().__init__() # same for x and y + + # overwrite supported y dtypes, as only floats are supported for nan-handling + self.y_dtype_regex_list = _nan_y_rust_dtypes + + @staticmethod + def _switch_mod_with_y( + y_dtype: np.dtype, mod: ModuleType, downsample_func: str = NAN_DOWNSAMPLE_F + ) -> Callable: + """The x-data is not considered in the downsampling + + Assumes equal binning. + + Parameters + ---------- + y_dtype : np.dtype + The dtype of the y-data + mod : ModuleType + The module to select the appropriate function from + downsample_func : str, optional + The name of the function to use, by default DOWNSAMPLE_FUNC. + """ + # FLOATS + if np.issubdtype(y_dtype, np.floating): + if y_dtype == np.float16: + return getattr(mod, downsample_func + "_f16") + elif y_dtype == np.float32: + return getattr(mod, downsample_func + "_f32") + elif y_dtype == np.float64: + return getattr(mod, downsample_func + "_f64") + raise ValueError(f"Unsupported data type (for y): {y_dtype}") + + @staticmethod + def _switch_mod_with_x_and_y( + x_dtype: np.dtype, y_dtype: np.dtype, mod: ModuleType + ) -> Callable: + """The x-data is considered in the downsampling + + Assumes equal binning. + + Parameters + ---------- + x_dtype : np.dtype + The dtype of the x-data + y_dtype : np.dtype + The dtype of the y-data + mod : ModuleType + The module to select the appropriate function from + """ + # FLOATS + if np.issubdtype(x_dtype, np.floating): + if x_dtype == np.float16: + return AbstractRustDownsampler._switch_mod_with_y( + y_dtype, mod, f"{NAN_DOWNSAMPLE_F}_f16" + ) + elif x_dtype == np.float32: + return AbstractRustDownsampler._switch_mod_with_y( + y_dtype, mod, f"{NAN_DOWNSAMPLE_F}_f32" + ) + elif x_dtype == np.float64: + return AbstractRustDownsampler._switch_mod_with_y( + y_dtype, mod, f"{NAN_DOWNSAMPLE_F}_f64" + ) + # UINTS + elif np.issubdtype(x_dtype, np.unsignedinteger): + if x_dtype == np.uint16: + return AbstractRustDownsampler._switch_mod_with_y( + y_dtype, mod, f"{NAN_DOWNSAMPLE_F}_u16" + ) + elif x_dtype == np.uint32: + return AbstractRustDownsampler._switch_mod_with_y( + y_dtype, mod, f"{NAN_DOWNSAMPLE_F}_u32" + ) + elif x_dtype == np.uint64: + return AbstractRustDownsampler._switch_mod_with_y( + y_dtype, mod, f"{NAN_DOWNSAMPLE_F}_u64" + ) + # INTS (need to be last because uint is subdtype of int) + elif np.issubdtype(x_dtype, np.integer): + if x_dtype == np.int16: + return AbstractRustDownsampler._switch_mod_with_y( + y_dtype, mod, f"{NAN_DOWNSAMPLE_F}_i16" + ) + elif x_dtype == np.int32: + return AbstractRustDownsampler._switch_mod_with_y( + y_dtype, mod, f"{NAN_DOWNSAMPLE_F}_i32" + ) + elif x_dtype == np.int64: + return AbstractRustDownsampler._switch_mod_with_y( + y_dtype, mod, f"{NAN_DOWNSAMPLE_F}_i64" + ) + # DATETIME -> i64 (datetime64 is viewed as int64) + # TIMEDELTA -> i64 (timedelta64 is viewed as int64) + raise ValueError(f"Unsupported data type (for x): {x_dtype}") + + def _downsample( + self, + x: Union[np.ndarray, None], + y: np.ndarray, + n_out: int, + n_threads: int = 1, + **kwargs, + ) -> np.ndarray: + """Downsample the data in x and y.""" + mod = self.mod_single_core + is_multi_core = False + parallel = n_threads > 1 + if parallel: + if self.mod_multi_core is None: + name = self.__class__.__name__ + warnings.warn( + f"No parallel implementation available for {name}. " + "Falling back to single-core implementation." + ) + else: + mod = self.mod_multi_core + is_multi_core = True + ## Viewing the x-data as different dtype (if necessary) + if x is None: + downsample_f = self._switch_mod_with_y(y.dtype, mod) + if is_multi_core: + return downsample_f(y, n_out, n_threads=n_threads, **kwargs) + else: + return downsample_f(y, n_out, **kwargs) + elif np.issubdtype(x.dtype, np.datetime64): + # datetime64 is viewed as int64 + x = x.view(dtype=np.int64) + elif np.issubdtype(x.dtype, np.timedelta64): + # timedelta64 is viewed as int64 + x = x.view(dtype=np.int64) + ## Getting the appropriate downsample function + downsample_f = self._switch_mod_with_x_and_y(x.dtype, y.dtype, mod) + if is_multi_core: + return downsample_f(x, y, n_out, n_threads=n_threads, **kwargs) + else: + return downsample_f(x, y, n_out, **kwargs) + + def downsample( + self, *args, n_out: int, n_threads: int = 1, **kwargs # x and y are optional + ): + """Downsample the data in x and y.""" + return super().downsample(*args, n_out=n_out, n_threads=n_threads, **kwargs) + + def __deepcopy__(self, memo): + """Deepcopy the object.""" + cls = self.__class__ + result = cls.__new__(cls) + memo[id(self)] = result + for k, v in self.__dict__.items(): + if k.endswith("_mod") or k.startswith("mod_"): + # Don't (deep)copy the compiled modules + setattr(result, k, v) + else: + setattr(result, k, deepcopy(v, memo)) + return result From 33c52e37b89e421486e306925dfbb204a3f6743d Mon Sep 17 00:00:00 2001 From: Niels Praet Date: Thu, 3 Aug 2023 10:14:37 +0200 Subject: [PATCH 06/26] =?UTF-8?q?=E2=9C=85=20tests:=20add=20new=20nan=20fu?= =?UTF-8?q?nctions=20to=20rust=20mod=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/test_config.py | 2 ++ tests/test_rust_mods.py | 30 +++++++++++++++++++++++++++--- 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/tests/test_config.py b/tests/test_config.py index 3ab5f41..e1a6d31 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -17,8 +17,10 @@ supported_dtypes_x = _core_supported_dtypes supported_dtypes_y = _core_supported_dtypes + [np.float16, np.int8, np.uint8, np.bool_] +supported_dtypes_y_nan = [np.float16, np.float32, np.float64] _core_rust_primitive_types = ["f32", "f64", "i16", "i32", "i64", "u16", "u32", "u64"] rust_primitive_types_x = _core_rust_primitive_types rust_primitive_types_y = _core_rust_primitive_types + ["f16", "i8", "u8"] +rust_primitive_types_y_nan = ["f16", "f32", "f64"] diff --git a/tests/test_rust_mods.py b/tests/test_rust_mods.py index a6532d3..950a3a4 100644 --- a/tests/test_rust_mods.py +++ b/tests/test_rust_mods.py @@ -1,4 +1,8 @@ -from test_config import rust_primitive_types_x, rust_primitive_types_y +from test_config import ( + rust_primitive_types_x, + rust_primitive_types_y, + rust_primitive_types_y_nan, +) import tsdownsample._rust._tsdownsample_rs as tsds_rs @@ -20,17 +24,36 @@ def _test_rust_mod_correctly_build(mod, sub_mods, has_x_impl: bool): for ty in rust_primitive_types_y: assert hasattr(m, f"downsample_{tx}_{ty}") +def _test_rust_nan_mod_correctly_build(mod, sub_mods, has_x_impl: bool): + # without x + for sub_mod in sub_mods: + assert hasattr(mod, sub_mod) + m = getattr(mod, sub_mod) + for ty in rust_primitive_types_y_nan: + assert hasattr(m, f"downsample_nan_{ty}") + + # with x + if not has_x_impl: + return + for sub_mod in sub_mods: + assert hasattr(mod, sub_mod) + m = getattr(mod, sub_mod) + for tx in rust_primitive_types_x: + for ty in rust_primitive_types_y_nan: + assert hasattr(m, f"downsample_{tx}_{ty}") def test_minmax_rust_mod_correctly_build(): mod = tsds_rs.minmax sub_mods = ["sequential", "parallel"] - _test_rust_mod_correctly_build(mod, sub_mods, has_x_impl=False) + _test_rust_mod_correctly_build(mod, sub_mods, has_x_impl=True) + _test_rust_nan_mod_correctly_build(mod, sub_mods, has_x_impl=True) def test_m4_rust_mod_correctly_build(): mod = tsds_rs.m4 sub_mods = ["sequential", "parallel"] - _test_rust_mod_correctly_build(mod, sub_mods, has_x_impl=False) + _test_rust_mod_correctly_build(mod, sub_mods, has_x_impl=True) + _test_rust_nan_mod_correctly_build(mod, sub_mods, has_x_impl=True) def test_lttb_rust_mod_correctly_build(): @@ -43,3 +66,4 @@ def test_minmaxlttb_rust_mod_correctly_build(): mod = tsds_rs.minmaxlttb sub_mods = ["sequential", "parallel"] _test_rust_mod_correctly_build(mod, sub_mods, has_x_impl=True) + _test_rust_nan_mod_correctly_build(mod, sub_mods, has_x_impl=True) From 591bdd795d177c15fce471fe7eac5e76e14e29a8 Mon Sep 17 00:00:00 2001 From: Niels Praet Date: Thu, 3 Aug 2023 10:15:02 +0200 Subject: [PATCH 07/26] =?UTF-8?q?=F0=9F=8E=A8=20chore:=20format=20code?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/test_rust_mods.py | 2 ++ tsdownsample/downsampling_interface.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/tests/test_rust_mods.py b/tests/test_rust_mods.py index 950a3a4..8a50ab8 100644 --- a/tests/test_rust_mods.py +++ b/tests/test_rust_mods.py @@ -24,6 +24,7 @@ def _test_rust_mod_correctly_build(mod, sub_mods, has_x_impl: bool): for ty in rust_primitive_types_y: assert hasattr(m, f"downsample_{tx}_{ty}") + def _test_rust_nan_mod_correctly_build(mod, sub_mods, has_x_impl: bool): # without x for sub_mod in sub_mods: @@ -42,6 +43,7 @@ def _test_rust_nan_mod_correctly_build(mod, sub_mods, has_x_impl: bool): for ty in rust_primitive_types_y_nan: assert hasattr(m, f"downsample_{tx}_{ty}") + def test_minmax_rust_mod_correctly_build(): mod = tsds_rs.minmax sub_mods = ["sequential", "parallel"] diff --git a/tsdownsample/downsampling_interface.py b/tsdownsample/downsampling_interface.py index 34a8f78..e77f9a3 100644 --- a/tsdownsample/downsampling_interface.py +++ b/tsdownsample/downsampling_interface.py @@ -363,9 +363,11 @@ def __deepcopy__(self, memo): setattr(result, k, deepcopy(v, memo)) return result + _nan_y_rust_dtypes = ["float16", "float32", "float64"] NAN_DOWNSAMPLE_F = "downsample_nan" + class AbstractRustNaNDownsampler(AbstractRustDownsampler, ABC): """RustNaNDownsampler interface-class, subclassed by concrete downsamplers.""" From acf1ff3a1f7972a28b53843776a4c7707cb797a3 Mon Sep 17 00:00:00 2001 From: Niels Praet Date: Thu, 3 Aug 2023 10:43:42 +0200 Subject: [PATCH 08/26] =?UTF-8?q?=E2=9C=A8=20feat:=20expose=20new=20nan=20?= =?UTF-8?q?downsamplers=20to=20api?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tsdownsample/__init__.py | 6 ++++++ tsdownsample/downsamplers.py | 39 +++++++++++++++++++++++++++++++++++- 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/tsdownsample/__init__.py b/tsdownsample/__init__.py index 32188b0..fb66296 100644 --- a/tsdownsample/__init__.py +++ b/tsdownsample/__init__.py @@ -6,6 +6,9 @@ M4Downsampler, MinMaxDownsampler, MinMaxLTTBDownsampler, + NaNM4Downsampler, + NanMinMaxDownsampler, + NaNMinMaxLTTBDownsampler, ) __version__ = "0.1.2" @@ -17,4 +20,7 @@ "M4Downsampler", "LTTBDownsampler", "MinMaxLTTBDownsampler", + "NanMinMaxDownsampler", + "NaNM4Downsampler", + "NaNMinMaxLTTBDownsampler" ] diff --git a/tsdownsample/downsamplers.py b/tsdownsample/downsamplers.py index 5a8f0e8..65b0744 100644 --- a/tsdownsample/downsamplers.py +++ b/tsdownsample/downsamplers.py @@ -6,7 +6,11 @@ # ------------------ Rust Downsamplers ------------------ from tsdownsample._rust import _tsdownsample_rs # type: ignore[attr-defined] -from .downsampling_interface import AbstractDownsampler, AbstractRustDownsampler +from .downsampling_interface import ( + AbstractDownsampler, + AbstractRustDownsampler, + AbstractRustNaNDownsampler, +) class MinMaxDownsampler(AbstractRustDownsampler): @@ -20,6 +24,17 @@ def _check_valid_n_out(n_out: int): if n_out % 2 != 0: raise ValueError("n_out must be even") +class NanMinMaxDownsampler(AbstractRustNaNDownsampler): + @property + def rust_mod(self): + return _tsdownsample_rs.minmax + + @staticmethod + def _check_valid_n_out(n_out: int): + AbstractRustDownsampler._check_valid_n_out(n_out) + if n_out % 2 != 0: + raise ValueError("n_out must be even") + class M4Downsampler(AbstractRustDownsampler): @property @@ -32,6 +47,16 @@ def _check_valid_n_out(n_out: int): if n_out % 4 != 0: raise ValueError("n_out must be a multiple of 4") +class NaNM4Downsampler(AbstractRustNaNDownsampler): + @property + def rust_mod(self): + return _tsdownsample_rs.m4 + + @staticmethod + def _check_valid_n_out(n_out: int): + AbstractRustDownsampler._check_valid_n_out(n_out) + if n_out % 4 != 0: + raise ValueError("n_out must be a multiple of 4") class LTTBDownsampler(AbstractRustDownsampler): @property @@ -52,6 +77,18 @@ def downsample( *args, n_out=n_out, n_threads=n_threads, ratio=minmax_ratio ) +class NaNMinMaxLTTBDownsampler(AbstractRustNaNDownsampler): + @property + def rust_mod(self): + return _tsdownsample_rs.minmaxlttb + + def downsample( + self, *args, n_out: int, minmax_ratio: int = 30, n_threads: int = 1, **_ + ): + assert minmax_ratio > 0, "minmax_ratio must be greater than 0" + return super().downsample( + *args, n_out=n_out, n_threads=n_threads, ratio=minmax_ratio + ) # ------------------ EveryNth Downsampler ------------------ From 585a889677f485c013fde3f0ff03ac9c290b5ed3 Mon Sep 17 00:00:00 2001 From: Niels Praet Date: Thu, 3 Aug 2023 11:01:23 +0200 Subject: [PATCH 09/26] =?UTF-8?q?=E2=9C=85=20tests:=20update=20tsdownsampl?= =?UTF-8?q?e=20tests=20to=20support=20nan=20downsamplers?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/test_tsdownsample.py | 44 ++++++++++++++++++++++++++++---------- 1 file changed, 33 insertions(+), 11 deletions(-) diff --git a/tests/test_tsdownsample.py b/tests/test_tsdownsample.py index ab75835..29fe266 100644 --- a/tests/test_tsdownsample.py +++ b/tests/test_tsdownsample.py @@ -2,7 +2,7 @@ import numpy as np import pytest -from test_config import supported_dtypes_x, supported_dtypes_y +from test_config import supported_dtypes_x, supported_dtypes_y, supported_dtypes_y_nan from tsdownsample import ( # MeanDownsampler,; MedianDownsampler, EveryNthDownsampler, @@ -10,6 +10,9 @@ M4Downsampler, MinMaxDownsampler, MinMaxLTTBDownsampler, + NaNM4Downsampler, + NanMinMaxDownsampler, + NaNMinMaxLTTBDownsampler, ) from tsdownsample.downsampling_interface import AbstractDownsampler @@ -24,18 +27,32 @@ MinMaxLTTBDownsampler(), ] -OTHER_DOWNSAMPLERS = [EveryNthDownsampler()] +RUST_NAN_DOWNSAMPLERS = [ + NanMinMaxDownsampler(), + NaNM4Downsampler(), + NaNMinMaxLTTBDownsampler() +] +OTHER_DOWNSAMPLERS = [EveryNthDownsampler()] def generate_rust_downsamplers() -> Iterable[AbstractDownsampler]: - for downsampler in RUST_DOWNSAMPLERS: + for downsampler in RUST_DOWNSAMPLERS + RUST_NAN_DOWNSAMPLERS: yield downsampler def generate_all_downsamplers() -> Iterable[AbstractDownsampler]: - for downsampler in RUST_DOWNSAMPLERS + OTHER_DOWNSAMPLERS: + for downsampler in RUST_DOWNSAMPLERS + RUST_NAN_DOWNSAMPLERS + OTHER_DOWNSAMPLERS: yield downsampler +def is_nan_downsampler(obj): + return obj.__class__.__name__ in [x.__class__.__name__ for x in RUST_NAN_DOWNSAMPLERS] + +def generate_datapoints(obj): + N_DATAPOINTS = 10_000 + if is_nan_downsampler(obj): + return np.arange(N_DATAPOINTS, dtype=np.float64) + else: + return np.arange(N_DATAPOINTS) @pytest.mark.parametrize("downsampler", generate_all_downsamplers()) def test_serialization_copy(downsampler: AbstractDownsampler): @@ -45,7 +62,8 @@ def test_serialization_copy(downsampler: AbstractDownsampler): dc = copy(downsampler) ddc = deepcopy(downsampler) - arr = np.arange(10_000) + arr = generate_datapoints(downsampler) + orig_downsampled = downsampler.downsample(arr, n_out=100) dc_downsampled = dc.downsample(arr, n_out=100) ddc_downsampled = ddc.downsample(arr, n_out=100) @@ -60,7 +78,7 @@ def test_serialization_pickle(downsampler: AbstractDownsampler): dc = pickle.loads(pickle.dumps(downsampler)) - arr = np.arange(10_000) + arr = generate_datapoints(downsampler) orig_downsampled = downsampler.downsample(arr, n_out=100) dc_downsampled = dc.downsample(arr, n_out=100) assert np.all(orig_downsampled == dc_downsampled) @@ -69,7 +87,7 @@ def test_serialization_pickle(downsampler: AbstractDownsampler): @pytest.mark.parametrize("downsampler", generate_rust_downsamplers()) def test_rust_downsampler(downsampler: AbstractDownsampler): """Test the Rust downsamplers.""" - arr = np.arange(10_000) + arr = generate_datapoints(downsampler) s_downsampled = downsampler.downsample(arr, n_out=100) assert s_downsampled[0] == 0 assert s_downsampled[-1] == len(arr) - 1 @@ -134,7 +152,8 @@ def test_downsampling_different_dtypes(downsampler: AbstractDownsampler): """Test downsampling with different data types.""" arr_orig = np.random.randint(0, 100, size=10_000) res = [] - for dtype_y in supported_dtypes_y: + y_dtypes = supported_dtypes_y_nan if is_nan_downsampler(downsampler) else supported_dtypes_y + for dtype_y in y_dtypes: arr = arr_orig.astype(dtype_y) s_downsampled = downsampler.downsample(arr, n_out=100) if dtype_y is not np.bool_: @@ -148,10 +167,11 @@ def test_downsampling_different_dtypes_with_x(downsampler: AbstractDownsampler): """Test downsampling with x with different data types.""" arr_orig = np.random.randint(0, 100, size=10_000) idx_orig = np.arange(len(arr_orig)) + y_dtypes = supported_dtypes_y_nan if is_nan_downsampler(downsampler) else supported_dtypes_y for dtype_x in supported_dtypes_x: res = [] idx = idx_orig.astype(dtype_x) - for dtype_y in supported_dtypes_y: + for dtype_y in y_dtypes: arr = arr_orig.astype(dtype_y) s_downsampled = downsampler.downsample(idx, arr, n_out=100) if dtype_y is not np.bool_: @@ -167,7 +187,8 @@ def test_downsampling_no_out_of_bounds_different_dtypes( """Test no out of bounds issues when downsampling with different data types.""" arr_orig = np.random.randint(0, 100, size=100) res = [] - for dtype in supported_dtypes_y: + y_dtypes = supported_dtypes_y_nan if is_nan_downsampler(downsampler) else supported_dtypes_y + for dtype in y_dtypes: arr = arr_orig.astype(dtype) s_downsampled = downsampler.downsample(arr, n_out=76) s_downsampled_p = downsampler.downsample(arr, n_out=76, n_threads=2) @@ -185,10 +206,11 @@ def test_downsampling_no_out_of_bounds_different_dtypes_with_x( """Test no out of bounds issues when downsampling with different data types.""" arr_orig = np.random.randint(0, 100, size=100) idx_orig = np.arange(len(arr_orig)) + y_dtypes = supported_dtypes_y_nan if is_nan_downsampler(downsampler) else supported_dtypes_y for dtype_x in supported_dtypes_x: res = [] idx = idx_orig.astype(dtype_x) - for dtype_y in supported_dtypes_y: + for dtype_y in y_dtypes: arr = arr_orig.astype(dtype_y) s_downsampled = downsampler.downsample(idx, arr, n_out=76) s_downsampled_p = downsampler.downsample(idx, arr, n_out=76, n_threads=2) From 224c0d906dd55db6dd88298b0ac0efb0841bf61c Mon Sep 17 00:00:00 2001 From: Niels Praet Date: Thu, 3 Aug 2023 11:08:36 +0200 Subject: [PATCH 10/26] =?UTF-8?q?=F0=9F=8E=A8=20chore:=20format=20code?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/test_tsdownsample.py | 34 ++++++++++++++++++++++++++++------ tsdownsample/__init__.py | 2 +- tsdownsample/downsamplers.py | 5 +++++ 3 files changed, 34 insertions(+), 7 deletions(-) diff --git a/tests/test_tsdownsample.py b/tests/test_tsdownsample.py index 29fe266..eb38246 100644 --- a/tests/test_tsdownsample.py +++ b/tests/test_tsdownsample.py @@ -30,11 +30,12 @@ RUST_NAN_DOWNSAMPLERS = [ NanMinMaxDownsampler(), NaNM4Downsampler(), - NaNMinMaxLTTBDownsampler() + NaNMinMaxLTTBDownsampler(), ] OTHER_DOWNSAMPLERS = [EveryNthDownsampler()] + def generate_rust_downsamplers() -> Iterable[AbstractDownsampler]: for downsampler in RUST_DOWNSAMPLERS + RUST_NAN_DOWNSAMPLERS: yield downsampler @@ -44,8 +45,12 @@ def generate_all_downsamplers() -> Iterable[AbstractDownsampler]: for downsampler in RUST_DOWNSAMPLERS + RUST_NAN_DOWNSAMPLERS + OTHER_DOWNSAMPLERS: yield downsampler + def is_nan_downsampler(obj): - return obj.__class__.__name__ in [x.__class__.__name__ for x in RUST_NAN_DOWNSAMPLERS] + return obj.__class__.__name__ in [ + x.__class__.__name__ for x in RUST_NAN_DOWNSAMPLERS + ] + def generate_datapoints(obj): N_DATAPOINTS = 10_000 @@ -54,6 +59,7 @@ def generate_datapoints(obj): else: return np.arange(N_DATAPOINTS) + @pytest.mark.parametrize("downsampler", generate_all_downsamplers()) def test_serialization_copy(downsampler: AbstractDownsampler): """Test serialization.""" @@ -152,7 +158,11 @@ def test_downsampling_different_dtypes(downsampler: AbstractDownsampler): """Test downsampling with different data types.""" arr_orig = np.random.randint(0, 100, size=10_000) res = [] - y_dtypes = supported_dtypes_y_nan if is_nan_downsampler(downsampler) else supported_dtypes_y + y_dtypes = ( + supported_dtypes_y_nan + if is_nan_downsampler(downsampler) + else supported_dtypes_y + ) for dtype_y in y_dtypes: arr = arr_orig.astype(dtype_y) s_downsampled = downsampler.downsample(arr, n_out=100) @@ -167,7 +177,11 @@ def test_downsampling_different_dtypes_with_x(downsampler: AbstractDownsampler): """Test downsampling with x with different data types.""" arr_orig = np.random.randint(0, 100, size=10_000) idx_orig = np.arange(len(arr_orig)) - y_dtypes = supported_dtypes_y_nan if is_nan_downsampler(downsampler) else supported_dtypes_y + y_dtypes = ( + supported_dtypes_y_nan + if is_nan_downsampler(downsampler) + else supported_dtypes_y + ) for dtype_x in supported_dtypes_x: res = [] idx = idx_orig.astype(dtype_x) @@ -187,7 +201,11 @@ def test_downsampling_no_out_of_bounds_different_dtypes( """Test no out of bounds issues when downsampling with different data types.""" arr_orig = np.random.randint(0, 100, size=100) res = [] - y_dtypes = supported_dtypes_y_nan if is_nan_downsampler(downsampler) else supported_dtypes_y + y_dtypes = ( + supported_dtypes_y_nan + if is_nan_downsampler(downsampler) + else supported_dtypes_y + ) for dtype in y_dtypes: arr = arr_orig.astype(dtype) s_downsampled = downsampler.downsample(arr, n_out=76) @@ -206,7 +224,11 @@ def test_downsampling_no_out_of_bounds_different_dtypes_with_x( """Test no out of bounds issues when downsampling with different data types.""" arr_orig = np.random.randint(0, 100, size=100) idx_orig = np.arange(len(arr_orig)) - y_dtypes = supported_dtypes_y_nan if is_nan_downsampler(downsampler) else supported_dtypes_y + y_dtypes = ( + supported_dtypes_y_nan + if is_nan_downsampler(downsampler) + else supported_dtypes_y + ) for dtype_x in supported_dtypes_x: res = [] idx = idx_orig.astype(dtype_x) diff --git a/tsdownsample/__init__.py b/tsdownsample/__init__.py index fb66296..14630ef 100644 --- a/tsdownsample/__init__.py +++ b/tsdownsample/__init__.py @@ -22,5 +22,5 @@ "MinMaxLTTBDownsampler", "NanMinMaxDownsampler", "NaNM4Downsampler", - "NaNMinMaxLTTBDownsampler" + "NaNMinMaxLTTBDownsampler", ] diff --git a/tsdownsample/downsamplers.py b/tsdownsample/downsamplers.py index 65b0744..9bca439 100644 --- a/tsdownsample/downsamplers.py +++ b/tsdownsample/downsamplers.py @@ -24,6 +24,7 @@ def _check_valid_n_out(n_out: int): if n_out % 2 != 0: raise ValueError("n_out must be even") + class NanMinMaxDownsampler(AbstractRustNaNDownsampler): @property def rust_mod(self): @@ -47,6 +48,7 @@ def _check_valid_n_out(n_out: int): if n_out % 4 != 0: raise ValueError("n_out must be a multiple of 4") + class NaNM4Downsampler(AbstractRustNaNDownsampler): @property def rust_mod(self): @@ -58,6 +60,7 @@ def _check_valid_n_out(n_out: int): if n_out % 4 != 0: raise ValueError("n_out must be a multiple of 4") + class LTTBDownsampler(AbstractRustDownsampler): @property def rust_mod(self): @@ -77,6 +80,7 @@ def downsample( *args, n_out=n_out, n_threads=n_threads, ratio=minmax_ratio ) + class NaNMinMaxLTTBDownsampler(AbstractRustNaNDownsampler): @property def rust_mod(self): @@ -90,6 +94,7 @@ def downsample( *args, n_out=n_out, n_threads=n_threads, ratio=minmax_ratio ) + # ------------------ EveryNth Downsampler ------------------ From 448689449b766cd5d5292120d265e9521da2daa3 Mon Sep 17 00:00:00 2001 From: Niels Praet Date: Thu, 3 Aug 2023 12:46:17 +0200 Subject: [PATCH 11/26] =?UTF-8?q?=E2=9C=85=20tests:=20add=20test=20for=20n?= =?UTF-8?q?an=20downsamplers?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/test_tsdownsample.py | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/tests/test_tsdownsample.py b/tests/test_tsdownsample.py index eb38246..d21ba64 100644 --- a/tests/test_tsdownsample.py +++ b/tests/test_tsdownsample.py @@ -14,7 +14,10 @@ NanMinMaxDownsampler, NaNMinMaxLTTBDownsampler, ) -from tsdownsample.downsampling_interface import AbstractDownsampler +from tsdownsample.downsampling_interface import ( + AbstractDownsampler, + AbstractRustNaNDownsampler, +) # TODO: Improve tests # - compare implementations with existing plotly_resampler implementations @@ -41,6 +44,11 @@ def generate_rust_downsamplers() -> Iterable[AbstractDownsampler]: yield downsampler +def generate_rust_nan_downsamplers() -> Iterable[AbstractDownsampler]: + for downsampler in RUST_NAN_DOWNSAMPLERS: + yield downsampler + + def generate_all_downsamplers() -> Iterable[AbstractDownsampler]: for downsampler in RUST_DOWNSAMPLERS + RUST_NAN_DOWNSAMPLERS + OTHER_DOWNSAMPLERS: yield downsampler @@ -60,6 +68,14 @@ def generate_datapoints(obj): return np.arange(N_DATAPOINTS) +def generate_nan_datapoints(): + N_DATAPOINTS = 10_000 + datapoints = np.arange(N_DATAPOINTS, dtype=np.float64) + datapoints[0] = np.nan + datapoints[9960] = np.nan + return datapoints + + @pytest.mark.parametrize("downsampler", generate_all_downsamplers()) def test_serialization_copy(downsampler: AbstractDownsampler): """Test serialization.""" @@ -99,6 +115,17 @@ def test_rust_downsampler(downsampler: AbstractDownsampler): assert s_downsampled[-1] == len(arr) - 1 +@pytest.mark.parametrize("downsampler", generate_rust_nan_downsamplers()) +def test_rust_nan_downsampler(downsampler: AbstractRustNaNDownsampler): + """Test the Rust NaN downsamplers.""" + datapoints = generate_nan_datapoints() + s_downsampled = downsampler.downsample(datapoints, n_out=100) + print(s_downsampled) + assert s_downsampled[0] == 0 + assert s_downsampled[-2] == 9960 + assert s_downsampled[50] != np.nan + + def test_everynth_downsampler(): """Test EveryNth downsampler.""" arr = np.arange(10_000) From 32b6a262477ddee3e05e3a1a5bf54585e48a6567 Mon Sep 17 00:00:00 2001 From: Niels Praet Date: Thu, 3 Aug 2023 16:49:49 +0200 Subject: [PATCH 12/26] =?UTF-8?q?=E2=9C=A8=20feat:=20add=20python=20counte?= =?UTF-8?q?rparts=20of=20Rust=20downsamplers?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/test_algos_python_compliance.py | 37 +++++++++++++- tsdownsample/_python/downsamplers.py | 71 +++++++++++++++++++++++++-- 2 files changed, 103 insertions(+), 5 deletions(-) diff --git a/tests/test_algos_python_compliance.py b/tests/test_algos_python_compliance.py index d7995a1..9249b55 100644 --- a/tests/test_algos_python_compliance.py +++ b/tests/test_algos_python_compliance.py @@ -1,8 +1,13 @@ import numpy as np import pytest -from tsdownsample import LTTBDownsampler, M4Downsampler, MinMaxDownsampler -from tsdownsample._python.downsamplers import LTTB_py, M4_py, MinMax_py +from tsdownsample import ( + LTTBDownsampler, + M4Downsampler, + MinMaxDownsampler, + NanMinMaxDownsampler, +) +from tsdownsample._python.downsamplers import LTTB_py, M4_py, MinMax_py, NaNMinMax_py @pytest.mark.parametrize( @@ -29,3 +34,31 @@ def test_resampler_accordance(rust_python_pair, n, n_out): rust_downsampler.downsample(x, y, n_out=n_out), python_downsampler.downsample(x, y, n_out=n_out), ) + + +@pytest.mark.parametrize( + "rust_python_pair", + [ + (NanMinMaxDownsampler(), NaNMinMax_py()), + # (NaNM4Downsampler(), NaNM4_py()) + ], +) +@pytest.mark.parametrize("n", [10_000, 10_032, 20_321, 23_489]) +@pytest.mark.parametrize("n_random_nans", [100, 200, 500, 2000, 5000]) +@pytest.mark.parametrize("n_out", [100, 200, 252]) +def test_nan_resampler_accordance(rust_python_pair, n, n_random_nans, n_out): + rust_downsampler, python_downsampler = rust_python_pair + x = np.arange(n) + y = np.random.randn(n) + y[np.random.choice(y.size, n_random_nans, replace=False)] = np.nan + # Without x passed to the rust downsampler + rust_result = rust_downsampler.downsample(y, n_out=n_out) + python_result = python_downsampler.downsample(x, y, n_out=n_out) + print("rust", rust_result) + print("python", python_result) + assert np.allclose(rust_result, python_result) + # With x passed to the rust downsampler + assert np.allclose( + rust_downsampler.downsample(x, y, n_out=n_out), + python_downsampler.downsample(x, y, n_out=n_out), + ) diff --git a/tsdownsample/_python/downsamplers.py b/tsdownsample/_python/downsamplers.py index 864a828..ffe97c9 100644 --- a/tsdownsample/_python/downsamplers.py +++ b/tsdownsample/_python/downsamplers.py @@ -25,7 +25,8 @@ def _get_bin_idxs(x: np.ndarray, nb_bins: int) -> np.ndarray: bins = np.searchsorted(x, np.linspace(x[0], x[-1], nb_bins + 1), side="right") bins[0] = 0 bins[-1] = len(x) - return np.unique(bins) + return np.array(bins) + # return np.unique(bins) class LTTB_py(AbstractDownsampler): @@ -144,11 +145,40 @@ def _downsample( if not len(y_slice): continue # calculate the argmin(slice) & argmax(slice) - rel_idxs.append(lower + y_slice.argmin()) - rel_idxs.append(lower + y_slice.argmax()) + rel_idxs.append(lower + np.nanargmin(y_slice)) + rel_idxs.append(lower + np.nanargmax(y_slice)) return np.unique(rel_idxs) +class NaNMinMax_py(AbstractDownsampler): + @staticmethod + def _check_valid_n_out(n_out: int): + assert n_out % 2 == 0, "n_out must be a multiple of 2" + + def _downsample( + self, x: Union[np.ndarray, None], y: np.ndarray, n_out: int, **kwargs + ) -> np.ndarray: + if x is None: + # Is fine for this implementation as this is only used for testing + x = np.arange(y.shape[0]) + + xdt = x.dtype + if np.issubdtype(xdt, np.datetime64) or np.issubdtype(xdt, np.timedelta64): + x = x.view(np.int64) + + bins = _get_bin_idxs(x, n_out // 2) + + rel_idxs = [] + for lower, upper in zip(bins, bins[1:]): + y_slice = y[lower:upper] + if not len(y_slice): + continue + # calculate the argmin(slice) & argmax(slice) + rel_idxs.append(lower + np.argmin(y_slice)) + rel_idxs.append(lower + np.argmax(y_slice)) + return np.array(sorted(rel_idxs)) + + class M4_py(AbstractDownsampler): """Aggregation method which selects the 4 M-s, i.e y-argmin, y-argmax, x-argmin, and x-argmax per bin. @@ -159,6 +189,41 @@ class M4_py(AbstractDownsampler): """ + @staticmethod + def _check_valid_n_out(n_out: int): + assert n_out % 4 == 0, "n_out must be a multiple of 4" + + def _downsample( + self, x: Union[np.ndarray, None], y: np.ndarray, n_out: int, **kwargs + ) -> np.ndarray: + """TODO complete docs""" + if x is None: + # Is fine for this implementation as this is only used for testing + x = np.arange(y.shape[0]) + + xdt = x.dtype + if np.issubdtype(xdt, np.datetime64) or np.issubdtype(xdt, np.timedelta64): + x = x.view(np.int64) + + bins = _get_bin_idxs(x, n_out // 4) + + rel_idxs = [] + for lower, upper in zip(bins, bins[1:]): + y_slice = y[lower:upper] + if not len(y_slice): + continue + + # calculate the min(idx), argmin(slice), argmax(slice), max(idx) + rel_idxs.append(lower) + rel_idxs.append(lower + np.nanargmin(y_slice)) + rel_idxs.append(lower + np.nanargmax(y_slice)) + rel_idxs.append(upper - 1) + + # NOTE: we do not use the np.unique so that all indices are retained + return np.array(sorted(rel_idxs)) + + +class NaNM4_py(AbstractDownsampler): @staticmethod def _check_valid_n_out(n_out: int): assert n_out % 4 == 0, "n_out must be a multiple of 4" From f8e35818758226e47e6885fd9cc537a603016912 Mon Sep 17 00:00:00 2001 From: Niels Praet Date: Thu, 3 Aug 2023 16:51:29 +0200 Subject: [PATCH 13/26] =?UTF-8?q?=E2=9C=85=20tests:=20re-enable=20commente?= =?UTF-8?q?d=20out=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/test_algos_python_compliance.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tests/test_algos_python_compliance.py b/tests/test_algos_python_compliance.py index 9249b55..9d25744 100644 --- a/tests/test_algos_python_compliance.py +++ b/tests/test_algos_python_compliance.py @@ -5,9 +5,16 @@ LTTBDownsampler, M4Downsampler, MinMaxDownsampler, + NaNM4Downsampler, NanMinMaxDownsampler, ) -from tsdownsample._python.downsamplers import LTTB_py, M4_py, MinMax_py, NaNMinMax_py +from tsdownsample._python.downsamplers import ( + LTTB_py, + M4_py, + MinMax_py, + NaNM4_py, + NaNMinMax_py, +) @pytest.mark.parametrize( @@ -40,7 +47,7 @@ def test_resampler_accordance(rust_python_pair, n, n_out): "rust_python_pair", [ (NanMinMaxDownsampler(), NaNMinMax_py()), - # (NaNM4Downsampler(), NaNM4_py()) + (NaNM4Downsampler(), NaNM4_py()) ], ) @pytest.mark.parametrize("n", [10_000, 10_032, 20_321, 23_489]) @@ -54,8 +61,6 @@ def test_nan_resampler_accordance(rust_python_pair, n, n_random_nans, n_out): # Without x passed to the rust downsampler rust_result = rust_downsampler.downsample(y, n_out=n_out) python_result = python_downsampler.downsample(x, y, n_out=n_out) - print("rust", rust_result) - print("python", python_result) assert np.allclose(rust_result, python_result) # With x passed to the rust downsampler assert np.allclose( From f0af701e6d8e4c614de28bd9252dad8ca79e42bd Mon Sep 17 00:00:00 2001 From: Niels Praet Date: Thu, 3 Aug 2023 17:00:57 +0200 Subject: [PATCH 14/26] =?UTF-8?q?=F0=9F=8E=A8=20chore:=20format=20code?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/test_algos_python_compliance.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/test_algos_python_compliance.py b/tests/test_algos_python_compliance.py index 9d25744..ec7a6cb 100644 --- a/tests/test_algos_python_compliance.py +++ b/tests/test_algos_python_compliance.py @@ -45,10 +45,7 @@ def test_resampler_accordance(rust_python_pair, n, n_out): @pytest.mark.parametrize( "rust_python_pair", - [ - (NanMinMaxDownsampler(), NaNMinMax_py()), - (NaNM4Downsampler(), NaNM4_py()) - ], + [(NanMinMaxDownsampler(), NaNMinMax_py()), (NaNM4Downsampler(), NaNM4_py())], ) @pytest.mark.parametrize("n", [10_000, 10_032, 20_321, 23_489]) @pytest.mark.parametrize("n_random_nans", [100, 200, 500, 2000, 5000]) From 49ed8abad9e7f4d69aa40994d5e0d87b061b1e90 Mon Sep 17 00:00:00 2001 From: Niels Praet Date: Fri, 4 Aug 2023 10:25:42 +0200 Subject: [PATCH 15/26] =?UTF-8?q?=F0=9F=94=A5=20chore:=20remove=20commente?= =?UTF-8?q?d=20code?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tsdownsample/_python/downsamplers.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tsdownsample/_python/downsamplers.py b/tsdownsample/_python/downsamplers.py index ffe97c9..5343f9f 100644 --- a/tsdownsample/_python/downsamplers.py +++ b/tsdownsample/_python/downsamplers.py @@ -26,7 +26,6 @@ def _get_bin_idxs(x: np.ndarray, nb_bins: int) -> np.ndarray: bins[0] = 0 bins[-1] = len(x) return np.array(bins) - # return np.unique(bins) class LTTB_py(AbstractDownsampler): From b6fa37205864826b876586012575529640012f12 Mon Sep 17 00:00:00 2001 From: Niels Praet Date: Fri, 4 Aug 2023 10:26:06 +0200 Subject: [PATCH 16/26] =?UTF-8?q?=F0=9F=93=9D=20docs:=20update=20README.md?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 54 ++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 36 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index bcf025e..654b8c2 100644 --- a/README.md +++ b/README.md @@ -6,13 +6,14 @@ [![CodeQL](https://github.com/predict-idlab/tsdownsample/actions/workflows/codeql.yml/badge.svg)](https://github.com/predict-idlab/tsdownsample/actions/workflows/codeql.yml) [![Testing](https://github.com/predict-idlab/tsdownsample/actions/workflows/ci-downsample_rs.yml/badge.svg)](https://github.com/predict-idlab/tsdownsample/actions/workflows/ci-downsample_rs.yml) [![Testing](https://github.com/predict-idlab/tsdownsample/actions/workflows/ci-tsdownsample.yml/badge.svg)](https://github.com/predict-idlab/tsdownsample/actions/workflows/ci-tsdownsample.yml) + Extremely fast **time series downsampling 📈** for visualization, written in Rust. ## Features ✨ -* **Fast**: written in rust with PyO3 bindings +- **Fast**: written in rust with PyO3 bindings - leverages optimized [argminmax](https://github.com/jvdd/argminmax) - which is SIMD accelerated with runtime feature detection - scales linearly with the number of data points @@ -25,13 +26,13 @@ Extremely fast **time series downsampling 📈** for visualization, written in R In Rust - which is a compiled language - there is no GIL, so CPU-bound tasks can be parallelized (with Rayon) with little to no overhead. -* **Efficient**: memory efficient +- **Efficient**: memory efficient - works on views of the data (no copies) - no intermediate data structures are created -* **Flexible**: works on any type of data - - supported datatypes are - - for `x`: `f32`, `f64`, `i16`, `i32`, `i64`, `u16`, `u32`, `u64`, `datetime64`, `timedelta64` - - for `y`: `f16`, `f32`, `f64`, `i8`, `i16`, `i32`, `i64`, `u8`, `u16`, `u32`, `u64`, `datetime64`, `timedelta64`, `bool` +- **Flexible**: works on any type of data + - supported datatypes are + - for `x`: `f32`, `f64`, `i16`, `i32`, `i64`, `u16`, `u32`, `u64`, `datetime64`, `timedelta64` + - for `y`: `f16`, `f32`, `f64`, `i8`, `i16`, `i32`, `i64`, `u8`, `u16`, `u32`, `u64`, `datetime64`, `timedelta64`, `bool`\*
!! 🚀 f16 argminmax is 200-300x faster than numpy In contrast with all other data types above, f16 is *not* hardware supported (i.e., no instructions for f16) by most modern CPUs!!
@@ -39,7 +40,9 @@ Extremely fast **time series downsampling 📈** for visualization, written in R 💡 As for argminmax, only comparisons are needed - and thus no arithmetic operations - creating a symmetrical ordinal mapping from f16 to i16 is sufficient. This mapping allows to use the hardware supported scalar and SIMD i16 instructions - while not producing any memory overhead 🎉
More details are described in argminmax PR #1.
-* **Easy to use**: simple & flexible API +- **Easy to use**: simple & flexible API + +\*When `NaN`s need to be retained in the downsampling, the only supported datatypes for `y` are `f16`, `f32` and `f64` ## Install @@ -83,35 +86,50 @@ downsample([x], y, n_out, **kwargs) -> ndarray[uint64] ``` **Arguments**: + - `x` is optional - `x` and `y` are both positional arguments -- `n_out` is a mandatory keyword argument that defines the number of output values* -- `**kwargs` are optional keyword arguments *(see [table below](#downsampling-algorithms-📈))*: +- `n_out` is a mandatory keyword argument that defines the number of output values\* +- `**kwargs` are optional keyword arguments _(see [table below](#downsampling-algorithms-📈))_: - `n_threads`: how many threads to use for multi-threading (default `1`, so no multi-threading) - ... **Returns**: a `ndarray[uint64]` of indices that can be used to index the original data. -*When there are gaps in the time series, fewer than `n_out` indices may be returned. +\*When there are gaps in the time series, fewer than `n_out` indices may be returned. + ### Downsampling algorithms 📈 The following downsampling algorithms (classes) are implemented: -| Downsampler | Description | `**kwargs` | -| ---:| --- |--- | -| `MinMaxDownsampler` | selects the **min and max** value in each bin | `n_threads` | -| `M4Downsampler` | selects the [**min, max, first and last**](https://dl.acm.org/doi/pdf/10.14778/2732951.2732953) value in each bin | `n_threads` | -| `LTTBDownsampler` | performs the [**Largest Triangle Three Buckets**](https://skemman.is/bitstream/1946/15343/3/SS_MSthesis.pdf) algorithm | `n_threads` | -| `MinMaxLTTBDownsampler` | (*new two-step algorithm 🎉*) first selects `n_out` * `minmax_ratio` **min and max** values, then further reduces these to `n_out` values using the **Largest Triangle Three Buckets** algorithm | `n_threads`, `minmax_ratio`* | +| Downsampler | Description | `**kwargs` | +| ----------------------: | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------- | +| `MinMaxDownsampler` | selects the **min and max** value in each bin | `n_threads` | +| `M4Downsampler` | selects the [**min, max, first and last**](https://dl.acm.org/doi/pdf/10.14778/2732951.2732953) value in each bin | `n_threads` | +| `LTTBDownsampler` | performs the [**Largest Triangle Three Buckets**](https://skemman.is/bitstream/1946/15343/3/SS_MSthesis.pdf) algorithm | `n_threads` | +| `MinMaxLTTBDownsampler` | (_new two-step algorithm 🎉_) first selects `n_out` \* `minmax_ratio` **min and max** values, then further reduces these to `n_out` values using the **Largest Triangle Three Buckets** algorithm | `n_threads`, `minmax_ratio`\* | -*Default value for `minmax_ratio` is 30, which is empirically proven to be a good default. (More details in our upcomming paper) +\*Default value for `minmax_ratio` is 30, which is empirically proven to be a good default. (More details in our upcoming paper) +### Handling NaNs + +This library supports two `NaN`-policies: + +1. Omit `NaN`s (`NaN`s are ignored during downsampling). +2. Return `NaN` once there is one present in the bin of the considered data. + +| Omit `NaN`s | Return `NaN`s | +| ----------------------: | :------------------------- | +| `MinMaxDownsampler` | `NaNMinMaxDownsampler` | +| `M4Downsampler` | `NaNM4Downsampler` | +| `MinMaxLTTBDownsampler` | `NaNMinMaxLTTBDownsampler` | +| `LTTBDownsampler` | | ## Limitations & assumptions 🚨 Assumes; + 1. `x`-data is (non-strictly) monotonic increasing (i.e., sorted) -2. no `NaNs` in the data --- From df8db7f0b067ba989e8c44c624e2dd7fb77a575d Mon Sep 17 00:00:00 2001 From: Niels Praet Date: Mon, 7 Aug 2023 10:43:47 +0200 Subject: [PATCH 17/26] =?UTF-8?q?=F0=9F=93=9D=20docs:=20update=20NaN=20des?= =?UTF-8?q?criptions?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 654b8c2..e5e3b15 100644 --- a/README.md +++ b/README.md @@ -116,7 +116,7 @@ The following downsampling algorithms (classes) are implemented: This library supports two `NaN`-policies: 1. Omit `NaN`s (`NaN`s are ignored during downsampling). -2. Return `NaN` once there is one present in the bin of the considered data. +2. Return index of first `NaN` once there is at least one present in the bin of the considered data. | Omit `NaN`s | Return `NaN`s | | ----------------------: | :------------------------- | From 3c6d9d958612fb4a83e340879faf3d5df8eaf359 Mon Sep 17 00:00:00 2001 From: jvdd Date: Tue, 23 Jan 2024 18:00:03 +0100 Subject: [PATCH 18/26] :broom: remove threaded --- downsample_rs/src/m4.rs | 49 ++++++++-------- downsample_rs/src/minmax.rs | 49 ++++++++-------- downsample_rs/src/minmaxlttb.rs | 99 +++++++++++++++++++-------------- src/lib.rs | 6 +- 4 files changed, 113 insertions(+), 90 deletions(-) diff --git a/downsample_rs/src/m4.rs b/downsample_rs/src/m4.rs index 71d5689..75ce224 100644 --- a/downsample_rs/src/m4.rs +++ b/downsample_rs/src/m4.rs @@ -14,7 +14,7 @@ use super::POOL; // ----------- WITH X macro_rules! m4_with_x { - ($func_name:ident, $trait:path, $func:expr) => { + ($func_name:ident, $trait:path, $f_argminmax:expr) => { pub fn $func_name(x: &[Tx], arr: &[Ty], n_out: usize) -> Vec where for<'a> &'a [Ty]: $trait, @@ -23,7 +23,7 @@ macro_rules! m4_with_x { { assert_eq!(n_out % 4, 0); let bin_idx_iterator = get_equidistant_bin_idx_iterator(x, n_out / 4); - m4_generic_with_x(arr, bin_idx_iterator, n_out, $func) + m4_generic_with_x(arr, bin_idx_iterator, n_out, $f_argminmax) } }; } @@ -34,13 +34,13 @@ m4_with_x!(m4_with_x_nan, NaNArgMinMax, |arr| arr.nanargminmax()); // ----------- WITHOUT X macro_rules! m4_without_x { - ($func_name:ident, $trait:path, $func:expr) => { + ($func_name:ident, $trait:path, $f_argminmax:expr) => { pub fn $func_name(arr: &[T], n_out: usize) -> Vec where for<'a> &'a [T]: $trait, { assert_eq!(n_out % 4, 0); - m4_generic(arr, n_out, $func) + m4_generic(arr, n_out, $f_argminmax) } }; } @@ -52,15 +52,19 @@ m4_without_x!(m4_without_x_nan, NaNArgMinMax, |arr| arr.nanargminmax()); // ----------- WITH X -pub fn m4_with_x_parallel(x: &[Tx], arr: &[Ty], n_out: usize) -> Vec -where - for<'a> &'a [Ty]: ArgMinMax, - Tx: Num + FromPrimitive + AsPrimitive + Send + Sync, - Ty: Copy + PartialOrd + Send + Sync, -{ - assert_eq!(n_out % 4, 0); - let bin_idx_iterator = get_equidistant_bin_idx_iterator_parallel(x, n_out / 4); - m4_generic_with_x_parallel(arr, bin_idx_iterator, n_out, |arr| arr.argminmax()) +macro_rules! m4_with_x_parallel { + ($func_name:ident, $trait:path, $f_argminmax:expr) => { + pub fn $func_name(x: &[Tx], arr: &[Ty], n_out: usize) -> Vec + where + for<'a> &'a [Ty]: $trait, + Tx: Num + FromPrimitive + AsPrimitive + Send + Sync, + Ty: Copy + PartialOrd + Send + Sync, + { + assert_eq!(n_out % 4, 0); + let bin_idx_iterator = get_equidistant_bin_idx_iterator_parallel(x, n_out / 4); + m4_generic_with_x_parallel(arr, bin_idx_iterator, n_out, $f_argminmax) + } + }; } m4_with_x_parallel!(m4_with_x_parallel, ArgMinMax, |arr| arr.argminmax()); @@ -69,15 +73,16 @@ m4_with_x_parallel!(m4_with_x_parallel_nan, NaNArgMinMax, |arr| arr // ----------- WITHOUT X -pub fn m4_without_x_parallel( - arr: &[T], - n_out: usize, -) -> Vec -where - for<'a> &'a [T]: ArgMinMax, -{ - assert_eq!(n_out % 4, 0); - m4_generic_parallel(arr, n_out, |arr| arr.argminmax()) +macro_rules! m4_without_x_parallel { + ($func_name:ident, $trait:path, $f_argminmax:expr) => { + pub fn $func_name(arr: &[T], n_out: usize) -> Vec + where + for<'a> &'a [T]: $trait, + { + assert_eq!(n_out % 4, 0); + m4_generic_parallel(arr, n_out, $f_argminmax) + } + }; } m4_without_x_parallel!(m4_without_x_parallel, ArgMinMax, |arr| arr.argminmax()); diff --git a/downsample_rs/src/minmax.rs b/downsample_rs/src/minmax.rs index dc50849..12553d2 100644 --- a/downsample_rs/src/minmax.rs +++ b/downsample_rs/src/minmax.rs @@ -15,7 +15,7 @@ use super::POOL; // ----------- WITH X macro_rules! min_max_with_x { - ($func_name:ident, $trait:path, $func:expr) => { + ($func_name:ident, $trait:path, $f_argminmax:expr) => { pub fn $func_name(x: &[Tx], arr: &[Ty], n_out: usize) -> Vec where for<'a> &'a [Ty]: $trait, @@ -24,7 +24,7 @@ macro_rules! min_max_with_x { { assert_eq!(n_out % 2, 0); let bin_idx_iterator = get_equidistant_bin_idx_iterator(x, n_out / 2); - min_max_generic_with_x(arr, bin_idx_iterator, n_out, $func) + min_max_generic_with_x(arr, bin_idx_iterator, n_out, $f_argminmax) } }; } @@ -35,13 +35,13 @@ min_max_with_x!(min_max_with_x_nan, NaNArgMinMax, |arr| arr.nanargminmax()); // ----------- WITHOUT X macro_rules! min_max_without_x { - ($func_name:ident, $trait:path, $func:expr) => { + ($func_name:ident, $trait:path, $f_argminmax:expr) => { pub fn $func_name(arr: &[T], n_out: usize) -> Vec where for<'a> &'a [T]: $trait, { assert_eq!(n_out % 2, 0); - min_max_generic(arr, n_out, $func) + min_max_generic(arr, n_out, $f_argminmax) } }; } @@ -54,15 +54,19 @@ min_max_without_x!(min_max_without_x_nan, NaNArgMinMax, |arr| arr // ----------- WITH X -pub fn min_max_with_x_parallel(x: &[Tx], arr: &[Ty], n_out: usize) -> Vec -where - for<'a> &'a [Ty]: ArgMinMax, - Tx: Num + FromPrimitive + AsPrimitive + Send + Sync, - Ty: Copy + PartialOrd + Send + Sync, -{ - assert_eq!(n_out % 2, 0); - let bin_idx_iterator = get_equidistant_bin_idx_iterator_parallel(x, n_out / 2); - min_max_generic_with_x_parallel(arr, bin_idx_iterator, n_out, |arr| arr.argminmax()) +macro_rules! min_max_with_x_parallel { + ($func_name:ident, $trait:path, $f_argminmax:expr) => { + pub fn $func_name(x: &[Tx], arr: &[Ty], n_out: usize) -> Vec + where + for<'a> &'a [Ty]: $trait, + Tx: Num + FromPrimitive + AsPrimitive + Send + Sync, + Ty: Copy + PartialOrd + Send + Sync, + { + assert_eq!(n_out % 2, 0); + let bin_idx_iterator = get_equidistant_bin_idx_iterator_parallel(x, n_out / 2); + min_max_generic_with_x_parallel(arr, bin_idx_iterator, n_out, $f_argminmax) + } + }; } min_max_with_x_parallel!(min_max_with_x_parallel, ArgMinMax, |arr| arr.argminmax()); @@ -71,15 +75,16 @@ min_max_with_x_parallel!(min_max_with_x_parallel_nan, NaNArgMinMax, |arr| arr // ----------- WITHOUT X -pub fn min_max_without_x_parallel( - arr: &[T], - n_out: usize, -) -> Vec -where - for<'a> &'a [T]: ArgMinMax, -{ - assert_eq!(n_out % 2, 0); - min_max_generic_parallel(arr, n_out, |arr| arr.argminmax()) +macro_rules! min_max_without_x_parallel { + ($func_name:ident, $trait:path, $f_argminmax:expr) => { + pub fn $func_name(arr: &[T], n_out: usize) -> Vec + where + for<'a> &'a [T]: $trait, + { + assert_eq!(n_out % 2, 0); + min_max_generic_parallel(arr, n_out, $f_argminmax) + } + }; } min_max_without_x_parallel!(min_max_without_x_parallel, ArgMinMax, |arr| arr.argminmax()); diff --git a/downsample_rs/src/minmaxlttb.rs b/downsample_rs/src/minmaxlttb.rs index 1654fa4..959694b 100644 --- a/downsample_rs/src/minmaxlttb.rs +++ b/downsample_rs/src/minmaxlttb.rs @@ -10,16 +10,22 @@ use num_traits::{AsPrimitive, FromPrimitive}; // ----------- WITH X -pub fn minmaxlttb_with_x + FromPrimitive, Ty: Num + AsPrimitive>( - x: &[Tx], - y: &[Ty], - n_out: usize, - minmax_ratio: usize, -) -> Vec -where - for<'a> &'a [Ty]: ArgMinMax, -{ - minmaxlttb_generic(x, y, n_out, minmax_ratio, minmax::min_max_with_x) +macro_rules! minmaxlttb_with_x { + ($func_name:ident, $trait:ident, $f_minmax:expr) => { + pub fn $func_name( + x: &[Tx], + y: &[Ty], + n_out: usize, + minmax_ratio: usize, + ) -> Vec + where + for<'a> &'a [Ty]: $trait, + Tx: Num + AsPrimitive + FromPrimitive, + Ty: Num + AsPrimitive, + { + minmaxlttb_generic(x, y, n_out, minmax_ratio, $f_minmax) + } + }; } minmaxlttb_with_x!(minmaxlttb_with_x, ArgMinMax, minmax::min_max_with_x); @@ -31,15 +37,19 @@ minmaxlttb_with_x!( // ----------- WITHOUT X -pub fn minmaxlttb_without_x>( - y: &[Ty], - n_out: usize, - minmax_ratio: usize, -) -> Vec -where - for<'a> &'a [Ty]: ArgMinMax, -{ - minmaxlttb_generic_without_x(y, n_out, minmax_ratio, minmax::min_max_without_x) +macro_rules! minmaxlttb_without_x { + ($func_name:ident, $trait:ident, $f_minmax:expr) => { + pub fn $func_name>( + y: &[Ty], + n_out: usize, + minmax_ratio: usize, + ) -> Vec + where + for<'a> &'a [Ty]: $trait, + { + minmaxlttb_generic_without_x(y, n_out, minmax_ratio, $f_minmax) + } + }; } minmaxlttb_without_x!(minmaxlttb_without_x, ArgMinMax, minmax::min_max_without_x); @@ -53,19 +63,22 @@ minmaxlttb_without_x!( // ----------- WITH X -pub fn minmaxlttb_with_x_parallel< - Tx: Num + AsPrimitive + FromPrimitive + Send + Sync, - Ty: Num + AsPrimitive + Send + Sync, ->( - x: &[Tx], - y: &[Ty], - n_out: usize, - minmax_ratio: usize, -) -> Vec -where - for<'a> &'a [Ty]: ArgMinMax, -{ - minmaxlttb_generic(x, y, n_out, minmax_ratio, minmax::min_max_with_x_parallel) +macro_rules! minmaxlttb_with_x_parallel { + ($func_name:ident, $trait:ident, $f_minmax:expr) => { + pub fn $func_name( + x: &[Tx], + y: &[Ty], + n_out: usize, + minmax_ratio: usize, + ) -> Vec + where + for<'a> &'a [Ty]: $trait, + Tx: Num + AsPrimitive + FromPrimitive + Send + Sync, + Ty: Num + AsPrimitive + Send + Sync, + { + minmaxlttb_generic(x, y, n_out, minmax_ratio, $f_minmax) + } + }; } minmaxlttb_with_x_parallel!( @@ -81,15 +94,19 @@ minmaxlttb_with_x_parallel!( // ----------- WITHOUT X -pub fn minmaxlttb_without_x_parallel + Send + Sync>( - y: &[Ty], - n_out: usize, - minmax_ratio: usize, -) -> Vec -where - for<'a> &'a [Ty]: ArgMinMax, -{ - minmaxlttb_generic_without_x(y, n_out, minmax_ratio, minmax::min_max_without_x_parallel) +macro_rules! minmaxlttb_without_x_parallel { + ($func_name:ident, $trait:ident, $f_minmax:expr) => { + pub fn $func_name + Send + Sync>( + y: &[Ty], + n_out: usize, + minmax_ratio: usize, + ) -> Vec + where + for<'a> &'a [Ty]: $trait, + { + minmaxlttb_generic_without_x(y, n_out, minmax_ratio, $f_minmax) + } + }; } minmaxlttb_without_x_parallel!( diff --git a/src/lib.rs b/src/lib.rs index 19967c0..213578f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -185,7 +185,6 @@ macro_rules! _create_pyfuncs_without_x_helper { } macro_rules! create_pyfuncs_without_x { - // Use @threaded to differentiate between the single and multithreaded versions ($resample_mod:ident, $resample_fn:ident, $mod:ident) => { _create_pyfuncs_without_x_helper!( _create_pyfunc_without_x, @@ -205,7 +204,6 @@ macro_rules! create_pyfuncs_without_x { } macro_rules! create_pyfuncs_without_x_with_ratio { - // Use @threaded to differentiate between the single and multithreaded versions ($resample_mod:ident, $resample_fn:ident, $mod:ident) => { _create_pyfuncs_without_x_helper!( _create_pyfunc_without_x_with_ratio, @@ -234,7 +232,6 @@ macro_rules! _create_pyfuncs_with_x_helper { } macro_rules! create_pyfuncs_with_x { - // Use @threaded to differentiate between the single and multithreaded versions ($resample_mod:ident, $resample_fn:ident, $mod:ident) => { _create_pyfuncs_with_x_helper!(_create_pyfunc_with_x, $resample_mod, $resample_fn, $mod); }; @@ -244,7 +241,6 @@ macro_rules! create_pyfuncs_with_x { } macro_rules! create_pyfuncs_with_x_with_ratio { - // Use @threaded to differentiate between the single and multithreaded versions ($resample_mod:ident, $resample_fn:ident, $mod:ident) => { _create_pyfuncs_with_x_helper!( _create_pyfunc_with_x_with_ratio, @@ -417,7 +413,7 @@ fn minmaxlttb(_py: Python, m: &PyModule) -> PyResult<()> { minmaxlttb_without_x_parallel, parallel_mod ); - create_pyfuncs_without_x_with_ratio!(@nan @threaded + create_pyfuncs_without_x_with_ratio!(@nan minmaxlttb_mod, minmaxlttb_without_x_parallel, parallel_mod From 6916dbbfd9b3a6a01a7e4adb276fa6ebca78df9a Mon Sep 17 00:00:00 2001 From: jvdd Date: Wed, 24 Jan 2024 08:57:56 +0100 Subject: [PATCH 19/26] :tada: cleanup code --- tests/test_algos_python_compliance.py | 3 + tests/test_tsdownsample.py | 30 ++++- tsdownsample/downsamplers.py | 47 ++++++- tsdownsample/downsampling_interface.py | 175 +++++++------------------ 4 files changed, 125 insertions(+), 130 deletions(-) diff --git a/tests/test_algos_python_compliance.py b/tests/test_algos_python_compliance.py index ec7a6cb..3a92e54 100644 --- a/tests/test_algos_python_compliance.py +++ b/tests/test_algos_python_compliance.py @@ -23,6 +23,9 @@ (MinMaxDownsampler(), MinMax_py()), (M4Downsampler(), M4_py()), (LTTBDownsampler(), LTTB_py()), + # Include NaN downsamplers + (NanMinMaxDownsampler(), NaNMinMax_py()), + (NaNM4Downsampler(), NaNM4_py()), ], ) @pytest.mark.parametrize("n", [10_000, 10_032, 20_321, 23_489]) diff --git a/tests/test_tsdownsample.py b/tests/test_tsdownsample.py index d228482..3c97516 100644 --- a/tests/test_tsdownsample.py +++ b/tests/test_tsdownsample.py @@ -344,7 +344,7 @@ def test_error_invalid_args(): @pytest.mark.parametrize("downsampler", generate_rust_downsamplers()) def test_non_contiguous_array(downsampler: AbstractDownsampler): """Test non contiguous array.""" - arr = np.random.randint(0, 100, size=10_000) + arr = np.random.randint(0, 100, size=10_000).astype(np.float32) arr = arr[::2] assert not arr.flags["C_CONTIGUOUS"] with pytest.raises(ValueError) as e_msg: @@ -361,3 +361,31 @@ def test_everynth_non_contiguous_array(): s_downsampled = downsampler.downsample(arr, n_out=100) assert s_downsampled[0] == 0 assert s_downsampled[-1] == 4950 + + +def test_nan_minmax_downsampler(): + """Test NaN downsamplers.""" + arr = np.random.randn(50_000) + arr[::5] = np.nan + s_downsampled = NanMinMaxDownsampler().downsample(arr, n_out=100) + arr_downsampled = arr[s_downsampled] + assert np.all(np.isnan(arr_downsampled)) + + +def test_nan_m4_downsampler(): + """Test NaN downsamplers.""" + arr = np.random.randn(50_000) + arr[::5] = np.nan + s_downsampled = NaNM4Downsampler().downsample(arr, n_out=100) + arr_downsampled = arr[s_downsampled] + assert np.all(np.isnan(arr_downsampled[1::4])) # min is NaN + assert np.all(np.isnan(arr_downsampled[2::4])) # max is NaN + + +def test_nan_minmaxlttb_downsampler(): + """Test NaN downsamplers.""" + arr = np.random.randn(50_000) + arr[::5] = np.nan + s_downsampled = NaNMinMaxLTTBDownsampler().downsample(arr, n_out=100) + arr_downsampled = arr[s_downsampled] + assert np.all(np.isnan(arr_downsampled[1:-1])) # first and last are not NaN diff --git a/tsdownsample/downsamplers.py b/tsdownsample/downsamplers.py index 1132337..32796f0 100644 --- a/tsdownsample/downsamplers.py +++ b/tsdownsample/downsamplers.py @@ -14,6 +14,12 @@ class MinMaxDownsampler(AbstractRustDownsampler): + """Downsampler that uses the MinMax algorithm. If the y data contains NaNs, these + ignored (i.e. the NaNs are not taken into account when selecting data points). + + For each bin, the indices of the minimum and maximum values are selected. + """ + @property def rust_mod(self): return _tsdownsample_rs.minmax @@ -26,6 +32,12 @@ def _check_valid_n_out(n_out: int): class NanMinMaxDownsampler(AbstractRustNaNDownsampler): + """Downsampler that uses the MinMax algorithm. If the y data contains NaNs, the + indices of these NaNs are returned. + + For each bin, the indices of the minimum and maximum values are selected. + """ + @property def rust_mod(self): return _tsdownsample_rs.minmax @@ -38,6 +50,13 @@ def _check_valid_n_out(n_out: int): class M4Downsampler(AbstractRustDownsampler): + """Downsampler that uses the M4 algorithm. If the y data contains NaNs, these are + ignored (i.e. the NaNs are not taken into account when selecting data points). + + For each bin, the indices of the first, last, minimum and maximum values are + selected. + """ + @property def rust_mod(self): return _tsdownsample_rs.m4 @@ -50,6 +69,13 @@ def _check_valid_n_out(n_out: int): class NaNM4Downsampler(AbstractRustNaNDownsampler): + """Downsampler that uses the M4 algorithm. If the y data contains NaNs, the indices + of these NaNs are returned. + + For each bin, the indices of the first, last, minimum and maximum values are + selected. + """ + @property def rust_mod(self): return _tsdownsample_rs.m4 @@ -62,12 +88,21 @@ def _check_valid_n_out(n_out: int): class LTTBDownsampler(AbstractRustDownsampler): + """Downsampler that uses the LTTB algorithm.""" + @property def rust_mod(self): return _tsdownsample_rs.lttb class MinMaxLTTBDownsampler(AbstractRustDownsampler): + """Downsampler that uses the MinMaxLTTB algorithm. If the y data contains NaNs, + these are ignored (i.e. the NaNs are not taken into account when selecting data + points). + + MinMaxLTTB paper: https://arxiv.org/abs/2305.00332 + """ + @property def rust_mod(self): return _tsdownsample_rs.minmaxlttb @@ -82,16 +117,22 @@ def downsample( class NaNMinMaxLTTBDownsampler(AbstractRustNaNDownsampler): + """Downsampler that uses the MinMaxLTTB algorithm. If the y data contains NaNs, the + indices of these NaNs are returned. + + MinMaxLTTB paper: https://arxiv.org/abs/2305.00332 + """ + @property def rust_mod(self): return _tsdownsample_rs.minmaxlttb def downsample( - self, *args, n_out: int, minmax_ratio: int = 30, n_threads: int = 1, **_ + self, *args, n_out: int, minmax_ratio: int = 4, parallel: bool = False, **_ ): assert minmax_ratio > 0, "minmax_ratio must be greater than 0" return super().downsample( - *args, n_out=n_out, n_threads=n_threads, ratio=minmax_ratio + *args, n_out=n_out, parallel=parallel, ratio=minmax_ratio ) @@ -99,6 +140,8 @@ def downsample( class EveryNthDownsampler(AbstractDownsampler): + """Downsampler that selects every nth data point""" + def __init__(self, **kwargs): super().__init__(check_contiguous=False, **kwargs) diff --git a/tsdownsample/downsampling_interface.py b/tsdownsample/downsampling_interface.py index 0a82d6c..81a9bc5 100644 --- a/tsdownsample/downsampling_interface.py +++ b/tsdownsample/downsampling_interface.py @@ -200,9 +200,13 @@ def mod_multi_core(self) -> Union[ModuleType, None]: def _switch_mod_with_y( y_dtype: np.dtype, mod: ModuleType, downsample_func: str = DOWNSAMPLE_F ) -> Callable: - """The x-data is not considered in the downsampling + """Select the appropriate function from the rust module for the y-data. - Assumes equal binning. + Assumes equal binning (when no data for x is passed -> only this function is + executed). + Equidistant binning is utilized when a `downsample_func` is passed from the + `_switch_mod_with_x_and_y` method (since the x-data is considered in the + downsampling). Parameters ---------- @@ -212,6 +216,8 @@ def _switch_mod_with_y( The module to select the appropriate function from downsample_func : str, optional The name of the function to use, by default DOWNSAMPLE_FUNC. + This argument is passed from the `_switch_mod_with_x_and_y` method when + the x-data is considered in the downsampling. """ # FLOATS if np.issubdtype(y_dtype, np.floating): @@ -248,7 +254,10 @@ def _switch_mod_with_y( @staticmethod def _switch_mod_with_x_and_y( - x_dtype: np.dtype, y_dtype: np.dtype, mod: ModuleType + x_dtype: np.dtype, + y_dtype: np.dtype, + mod: ModuleType, + downsample_func: str = DOWNSAMPLE_F, ) -> Callable: """The x-data is considered in the downsampling @@ -262,48 +271,50 @@ def _switch_mod_with_x_and_y( The dtype of the y-data mod : ModuleType The module to select the appropriate function from + downsample_func : str, optional + The name of the function to use, by default DOWNSAMPLE_FUNC. """ # FLOATS if np.issubdtype(x_dtype, np.floating): if x_dtype == np.float16: return AbstractRustDownsampler._switch_mod_with_y( - y_dtype, mod, f"{DOWNSAMPLE_F}_f16" + y_dtype, mod, f"{downsample_func}_f16" ) elif x_dtype == np.float32: return AbstractRustDownsampler._switch_mod_with_y( - y_dtype, mod, f"{DOWNSAMPLE_F}_f32" + y_dtype, mod, f"{downsample_func}_f32" ) elif x_dtype == np.float64: return AbstractRustDownsampler._switch_mod_with_y( - y_dtype, mod, f"{DOWNSAMPLE_F}_f64" + y_dtype, mod, f"{downsample_func}_f64" ) # UINTS elif np.issubdtype(x_dtype, np.unsignedinteger): if x_dtype == np.uint16: return AbstractRustDownsampler._switch_mod_with_y( - y_dtype, mod, f"{DOWNSAMPLE_F}_u16" + y_dtype, mod, f"{downsample_func}_u16" ) elif x_dtype == np.uint32: return AbstractRustDownsampler._switch_mod_with_y( - y_dtype, mod, f"{DOWNSAMPLE_F}_u32" + y_dtype, mod, f"{downsample_func}_u32" ) elif x_dtype == np.uint64: return AbstractRustDownsampler._switch_mod_with_y( - y_dtype, mod, f"{DOWNSAMPLE_F}_u64" + y_dtype, mod, f"{downsample_func}_u64" ) # INTS (need to be last because uint is subdtype of int) elif np.issubdtype(x_dtype, np.integer): if x_dtype == np.int16: return AbstractRustDownsampler._switch_mod_with_y( - y_dtype, mod, f"{DOWNSAMPLE_F}_i16" + y_dtype, mod, f"{downsample_func}_i16" ) elif x_dtype == np.int32: return AbstractRustDownsampler._switch_mod_with_y( - y_dtype, mod, f"{DOWNSAMPLE_F}_i32" + y_dtype, mod, f"{downsample_func}_i32" ) elif x_dtype == np.int64: return AbstractRustDownsampler._switch_mod_with_y( - y_dtype, mod, f"{DOWNSAMPLE_F}_i64" + y_dtype, mod, f"{downsample_func}_i64" ) # DATETIME -> i64 (datetime64 is viewed as int64) # TIMEDELTA -> i64 (timedelta64 is viewed as int64) @@ -349,7 +360,9 @@ def _downsample( # timedelta64 is viewed as int64 x = x.view(dtype=np.int64) ## Getting the appropriate downsample function - downsample_f = self._switch_mod_with_x_and_y(x.dtype, y.dtype, mod) + downsample_f = self._switch_mod_with_x_and_y( + x.dtype, y.dtype, mod, DOWNSAMPLE_F + ) return downsample_f(x, y, n_out, **kwargs) def downsample(self, *args, n_out: int, parallel: bool = False, **kwargs): @@ -383,19 +396,17 @@ def __deepcopy__(self, memo): class AbstractRustNaNDownsampler(AbstractRustDownsampler, ABC): """RustNaNDownsampler interface-class, subclassed by concrete downsamplers.""" - def __init__(self): - super().__init__() # same for x and y - - # overwrite supported y dtypes, as only floats are supported for nan-handling - self.y_dtype_regex_list = _nan_y_rust_dtypes - @staticmethod def _switch_mod_with_y( y_dtype: np.dtype, mod: ModuleType, downsample_func: str = NAN_DOWNSAMPLE_F ) -> Callable: - """The x-data is not considered in the downsampling + """Select the appropriate function from the rust module for the y-data. - Assumes equal binning. + Assumes equal binning (when no data for x is passed -> only this function is + executed). + Equidistant binning is utilized when a `downsample_func` is passed from the + `_switch_mod_with_x_and_y` method (since the x-data is considered in the + downsampling). Parameters ---------- @@ -404,93 +415,27 @@ def _switch_mod_with_y( mod : ModuleType The module to select the appropriate function from downsample_func : str, optional - The name of the function to use, by default DOWNSAMPLE_FUNC. + The name of the function to use, by default NAN_DOWNSAMPLE_F. + This argument is passed from the `_switch_mod_with_x_and_y` method when + the x-data is considered in the downsampling. """ - # FLOATS - if np.issubdtype(y_dtype, np.floating): - if y_dtype == np.float16: - return getattr(mod, downsample_func + "_f16") - elif y_dtype == np.float32: - return getattr(mod, downsample_func + "_f32") - elif y_dtype == np.float64: - return getattr(mod, downsample_func + "_f64") - raise ValueError(f"Unsupported data type (for y): {y_dtype}") - - @staticmethod - def _switch_mod_with_x_and_y( - x_dtype: np.dtype, y_dtype: np.dtype, mod: ModuleType - ) -> Callable: - """The x-data is considered in the downsampling - - Assumes equal binning. - - Parameters - ---------- - x_dtype : np.dtype - The dtype of the x-data - y_dtype : np.dtype - The dtype of the y-data - mod : ModuleType - The module to select the appropriate function from - """ - # FLOATS - if np.issubdtype(x_dtype, np.floating): - if x_dtype == np.float16: - return AbstractRustDownsampler._switch_mod_with_y( - y_dtype, mod, f"{NAN_DOWNSAMPLE_F}_f16" - ) - elif x_dtype == np.float32: - return AbstractRustDownsampler._switch_mod_with_y( - y_dtype, mod, f"{NAN_DOWNSAMPLE_F}_f32" - ) - elif x_dtype == np.float64: - return AbstractRustDownsampler._switch_mod_with_y( - y_dtype, mod, f"{NAN_DOWNSAMPLE_F}_f64" - ) - # UINTS - elif np.issubdtype(x_dtype, np.unsignedinteger): - if x_dtype == np.uint16: - return AbstractRustDownsampler._switch_mod_with_y( - y_dtype, mod, f"{NAN_DOWNSAMPLE_F}_u16" - ) - elif x_dtype == np.uint32: - return AbstractRustDownsampler._switch_mod_with_y( - y_dtype, mod, f"{NAN_DOWNSAMPLE_F}_u32" - ) - elif x_dtype == np.uint64: - return AbstractRustDownsampler._switch_mod_with_y( - y_dtype, mod, f"{NAN_DOWNSAMPLE_F}_u64" - ) - # INTS (need to be last because uint is subdtype of int) - elif np.issubdtype(x_dtype, np.integer): - if x_dtype == np.int16: - return AbstractRustDownsampler._switch_mod_with_y( - y_dtype, mod, f"{NAN_DOWNSAMPLE_F}_i16" - ) - elif x_dtype == np.int32: - return AbstractRustDownsampler._switch_mod_with_y( - y_dtype, mod, f"{NAN_DOWNSAMPLE_F}_i32" - ) - elif x_dtype == np.int64: - return AbstractRustDownsampler._switch_mod_with_y( - y_dtype, mod, f"{NAN_DOWNSAMPLE_F}_i64" - ) - # DATETIME -> i64 (datetime64 is viewed as int64) - # TIMEDELTA -> i64 (timedelta64 is viewed as int64) - raise ValueError(f"Unsupported data type (for x): {x_dtype}") + if not np.issubdtype(y_dtype, np.floating): + # When y is not a float, we need to remove the _nan suffix to use the + # regular downsample function as the _nan suffix is only used for floats. + # (Note that NaNs only exist for floats) + downsample_func = downsample_func.replace("_nan", "") + return AbstractRustDownsampler._switch_mod_with_y(y_dtype, mod, downsample_func) def _downsample( self, x: Union[np.ndarray, None], y: np.ndarray, n_out: int, - n_threads: int = 1, + parallel: bool = False, **kwargs, ) -> np.ndarray: """Downsample the data in x and y.""" mod = self.mod_single_core - is_multi_core = False - parallel = n_threads > 1 if parallel: if self.mod_multi_core is None: name = self.__class__.__name__ @@ -500,14 +445,10 @@ def _downsample( ) else: mod = self.mod_multi_core - is_multi_core = True ## Viewing the x-data as different dtype (if necessary) if x is None: - downsample_f = self._switch_mod_with_y(y.dtype, mod) - if is_multi_core: - return downsample_f(y, n_out, n_threads=n_threads, **kwargs) - else: - return downsample_f(y, n_out, **kwargs) + downsample_f = self._switch_mod_with_y(y.dtype, mod, NAN_DOWNSAMPLE_F) + return downsample_f(y, n_out, **kwargs) elif np.issubdtype(x.dtype, np.datetime64): # datetime64 is viewed as int64 x = x.view(dtype=np.int64) @@ -515,27 +456,7 @@ def _downsample( # timedelta64 is viewed as int64 x = x.view(dtype=np.int64) ## Getting the appropriate downsample function - downsample_f = self._switch_mod_with_x_and_y(x.dtype, y.dtype, mod) - if is_multi_core: - return downsample_f(x, y, n_out, n_threads=n_threads, **kwargs) - else: - return downsample_f(x, y, n_out, **kwargs) - - def downsample( - self, *args, n_out: int, n_threads: int = 1, **kwargs # x and y are optional - ): - """Downsample the data in x and y.""" - return super().downsample(*args, n_out=n_out, n_threads=n_threads, **kwargs) - - def __deepcopy__(self, memo): - """Deepcopy the object.""" - cls = self.__class__ - result = cls.__new__(cls) - memo[id(self)] = result - for k, v in self.__dict__.items(): - if k.endswith("_mod") or k.startswith("mod_"): - # Don't (deep)copy the compiled modules - setattr(result, k, v) - else: - setattr(result, k, deepcopy(v, memo)) - return result + downsample_f = self._switch_mod_with_x_and_y( + x.dtype, y.dtype, mod, NAN_DOWNSAMPLE_F + ) + return downsample_f(x, y, n_out, **kwargs) From 910c788221a6a5932f28ac0a0127f9be6aa93a73 Mon Sep 17 00:00:00 2001 From: jvdd Date: Wed, 24 Jan 2024 09:48:07 +0100 Subject: [PATCH 20/26] :see_no_evil: fix typo in NaNMinMaxDownsampler --- README.md | 7 ++++--- tests/test_algos_python_compliance.py | 6 +++--- tests/test_tsdownsample.py | 6 +++--- tsdownsample/__init__.py | 4 ++-- tsdownsample/downsamplers.py | 2 +- 5 files changed, 13 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index e3e55f2..1766eac 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ Extremely fast **time series downsampling 📈** for visualization, written in R - **Flexible**: works on any type of data - supported datatypes are - for `x`: `f32`, `f64`, `i16`, `i32`, `i64`, `u16`, `u32`, `u64`, `datetime64`, `timedelta64` - - for `y`: `f16`, `f32`, `f64`, `i8`, `i16`, `i32`, `i64`, `u8`, `u16`, `u32`, `u64`, `datetime64`, `timedelta64`, `bool`\* + - for `y`: `f16`, `f32`, `f64`, `i8`, `i16`, `i32`, `i64`, `u8`, `u16`, `u32`, `u64`, `datetime64`, `timedelta64`, `bool`
!! 🚀 f16 argminmax is 200-300x faster than numpy In contrast with all other data types above, f16 is *not* hardware supported (i.e., no instructions for f16) by most modern CPUs!!
@@ -42,8 +42,6 @@ Extremely fast **time series downsampling 📈** for visualization, written in R
- **Easy to use**: simple & flexible API -\*When `NaN`s need to be retained in the downsampling, the only supported datatypes for `y` are `f16`, `f32` and `f64` - ## Install ```bash @@ -126,11 +124,14 @@ This library supports two `NaN`-policies: | `MinMaxLTTBDownsampler` | `NaNMinMaxLTTBDownsampler` | | `LTTBDownsampler` | | +> Note that NaNs are not supported for `x`-data. + ## Limitations & assumptions 🚨 Assumes; 1. `x`-data is (non-strictly) monotonic increasing (i.e., sorted) +2. no `NaN`s in `x`-data --- diff --git a/tests/test_algos_python_compliance.py b/tests/test_algos_python_compliance.py index 3a92e54..8a64163 100644 --- a/tests/test_algos_python_compliance.py +++ b/tests/test_algos_python_compliance.py @@ -6,7 +6,7 @@ M4Downsampler, MinMaxDownsampler, NaNM4Downsampler, - NanMinMaxDownsampler, + NaNMinMaxDownsampler, ) from tsdownsample._python.downsamplers import ( LTTB_py, @@ -24,7 +24,7 @@ (M4Downsampler(), M4_py()), (LTTBDownsampler(), LTTB_py()), # Include NaN downsamplers - (NanMinMaxDownsampler(), NaNMinMax_py()), + (NaNMinMaxDownsampler(), NaNMinMax_py()), (NaNM4Downsampler(), NaNM4_py()), ], ) @@ -48,7 +48,7 @@ def test_resampler_accordance(rust_python_pair, n, n_out): @pytest.mark.parametrize( "rust_python_pair", - [(NanMinMaxDownsampler(), NaNMinMax_py()), (NaNM4Downsampler(), NaNM4_py())], + [(NaNMinMaxDownsampler(), NaNMinMax_py()), (NaNM4Downsampler(), NaNM4_py())], ) @pytest.mark.parametrize("n", [10_000, 10_032, 20_321, 23_489]) @pytest.mark.parametrize("n_random_nans", [100, 200, 500, 2000, 5000]) diff --git a/tests/test_tsdownsample.py b/tests/test_tsdownsample.py index 3c97516..fb7c4af 100644 --- a/tests/test_tsdownsample.py +++ b/tests/test_tsdownsample.py @@ -11,7 +11,7 @@ MinMaxDownsampler, MinMaxLTTBDownsampler, NaNM4Downsampler, - NanMinMaxDownsampler, + NaNMinMaxDownsampler, NaNMinMaxLTTBDownsampler, ) from tsdownsample.downsampling_interface import ( @@ -31,7 +31,7 @@ ] RUST_NAN_DOWNSAMPLERS = [ - NanMinMaxDownsampler(), + NaNMinMaxDownsampler(), NaNM4Downsampler(), NaNMinMaxLTTBDownsampler(), ] @@ -367,7 +367,7 @@ def test_nan_minmax_downsampler(): """Test NaN downsamplers.""" arr = np.random.randn(50_000) arr[::5] = np.nan - s_downsampled = NanMinMaxDownsampler().downsample(arr, n_out=100) + s_downsampled = NaNMinMaxDownsampler().downsample(arr, n_out=100) arr_downsampled = arr[s_downsampled] assert np.all(np.isnan(arr_downsampled)) diff --git a/tsdownsample/__init__.py b/tsdownsample/__init__.py index 9de8d61..1d91c57 100644 --- a/tsdownsample/__init__.py +++ b/tsdownsample/__init__.py @@ -7,7 +7,7 @@ MinMaxDownsampler, MinMaxLTTBDownsampler, NaNM4Downsampler, - NanMinMaxDownsampler, + NaNMinMaxDownsampler, NaNMinMaxLTTBDownsampler, ) @@ -20,7 +20,7 @@ "M4Downsampler", "LTTBDownsampler", "MinMaxLTTBDownsampler", - "NanMinMaxDownsampler", + "NaNMinMaxDownsampler", "NaNM4Downsampler", "NaNMinMaxLTTBDownsampler", ] diff --git a/tsdownsample/downsamplers.py b/tsdownsample/downsamplers.py index 32796f0..93519b7 100644 --- a/tsdownsample/downsamplers.py +++ b/tsdownsample/downsamplers.py @@ -31,7 +31,7 @@ def _check_valid_n_out(n_out: int): raise ValueError("n_out must be even") -class NanMinMaxDownsampler(AbstractRustNaNDownsampler): +class NaNMinMaxDownsampler(AbstractRustNaNDownsampler): """Downsampler that uses the MinMax algorithm. If the y data contains NaNs, the indices of these NaNs are returned. From 62b048907c149efd110edf9e44d91199d61d657c Mon Sep 17 00:00:00 2001 From: jvdd Date: Wed, 24 Jan 2024 09:48:58 +0100 Subject: [PATCH 21/26] :detective: benchmark NaN downsamplers --- tests/benchmarks/test_downsamplers.py | 102 ++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) diff --git a/tests/benchmarks/test_downsamplers.py b/tests/benchmarks/test_downsamplers.py index e257487..c7ca255 100644 --- a/tests/benchmarks/test_downsamplers.py +++ b/tests/benchmarks/test_downsamplers.py @@ -7,6 +7,9 @@ M4Downsampler, MinMaxDownsampler, MinMaxLTTBDownsampler, + NaNM4Downsampler, + NaNMinMaxDownsampler, + NaNMinMaxLTTBDownsampler, ) NB_SAMPLES = ["100,000", "1,000,000"] @@ -52,6 +55,39 @@ def test_minmax_with_x(benchmark, n_samples, n_out, dtype, parallel): benchmark(downsampler.downsample, x, y, n_out=n_out, parallel=parallel) +@pytest.mark.benchmark(group="nanminmax") +@pytest.mark.parametrize("n_samples", NB_SAMPLES) +@pytest.mark.parametrize("n_out", N_OUT) +@pytest.mark.parametrize("dtype", Y_DTYPES) +@pytest.mark.parametrize("parallel", [False, True]) +def test_nanminmax_no_x(benchmark, n_samples, n_out, dtype, parallel): + """Test the MinMaxDownsampler.""" + downsampler = NaNMinMaxDownsampler() + n_samples = int(n_samples.replace(",", "")) + n_out = int(n_out.replace(",", "")) + + y = np.random.randn(n_samples).astype(dtype) + + benchmark(downsampler.downsample, y, n_out=n_out, parallel=parallel) + + +@pytest.mark.benchmark(group="nanminmax") +@pytest.mark.parametrize("n_samples", NB_SAMPLES) +@pytest.mark.parametrize("n_out", N_OUT) +@pytest.mark.parametrize("dtype", Y_DTYPES) +@pytest.mark.parametrize("parallel", [False, True]) +def test_nanminmax_with_x(benchmark, n_samples, n_out, dtype, parallel): + """Test the MinMaxDownsampler.""" + downsampler = NaNMinMaxDownsampler() + n_samples = int(n_samples.replace(",", "")) + n_out = int(n_out.replace(",", "")) + + x = np.arange(n_samples) + y = np.random.randn(n_samples).astype(dtype) + + benchmark(downsampler.downsample, x, y, n_out=n_out, parallel=parallel) + + # --------------------------------------------------------------------------- # # M4Downsampler # --------------------------------------------------------------------------- # @@ -90,6 +126,39 @@ def test_m4_with_x(benchmark, n_samples, n_out, dtype, parallel): benchmark(downsampler.downsample, x, y, n_out=n_out, parallel=parallel) +@pytest.mark.benchmark(group="nanm4") +@pytest.mark.parametrize("n_samples", NB_SAMPLES) +@pytest.mark.parametrize("n_out", N_OUT) +@pytest.mark.parametrize("dtype", Y_DTYPES) +@pytest.mark.parametrize("parallel", [False, True]) +def test_nanm4_no_x(benchmark, n_samples, n_out, dtype, parallel): + """Test the M4Downsampler.""" + downsampler = NaNM4Downsampler() + n_samples = int(n_samples.replace(",", "")) + n_out = int(n_out.replace(",", "")) + + y = np.random.randn(n_samples).astype(dtype) + + benchmark(downsampler.downsample, y, n_out=n_out, parallel=parallel) + + +@pytest.mark.benchmark(group="nanm4") +@pytest.mark.parametrize("n_samples", NB_SAMPLES) +@pytest.mark.parametrize("n_out", N_OUT) +@pytest.mark.parametrize("dtype", Y_DTYPES) +@pytest.mark.parametrize("parallel", [False, True]) +def test_nanm4_with_x(benchmark, n_samples, n_out, dtype, parallel): + """Test the M4Downsampler.""" + downsampler = NaNM4Downsampler() + n_samples = int(n_samples.replace(",", "")) + n_out = int(n_out.replace(",", "")) + + x = np.arange(n_samples) + y = np.random.randn(n_samples).astype(dtype) + + benchmark(downsampler.downsample, x, y, n_out=n_out, parallel=parallel) + + # --------------------------------------------------------------------------- # # LTTBDownsampler # --------------------------------------------------------------------------- # @@ -166,6 +235,39 @@ def test_minmaxlttb_with_x(benchmark, n_samples, n_out, dtype, parallel): benchmark(downsampler.downsample, x, y, n_out=n_out, parallel=parallel) +@pytest.mark.benchmark(group="nanminmaxlttb") +@pytest.mark.parametrize("n_samples", NB_SAMPLES) +@pytest.mark.parametrize("n_out", N_OUT) +@pytest.mark.parametrize("dtype", Y_DTYPES) +@pytest.mark.parametrize("parallel", [False, True]) +def test_nanminmaxlttb_no_x(benchmark, n_samples, n_out, dtype, parallel): + """Test the MinMaxLTTBDownsampler.""" + downsampler = NaNMinMaxLTTBDownsampler() + n_samples = int(n_samples.replace(",", "")) + n_out = int(n_out.replace(",", "")) + + y = np.random.randn(n_samples).astype(dtype) + + benchmark(downsampler.downsample, y, n_out=n_out, parallel=parallel) + + +@pytest.mark.benchmark(group="nanminmaxlttb") +@pytest.mark.parametrize("n_samples", NB_SAMPLES) +@pytest.mark.parametrize("n_out", N_OUT) +@pytest.mark.parametrize("dtype", Y_DTYPES) +@pytest.mark.parametrize("parallel", [False, True]) +def test_nanminmaxlttb_with_x(benchmark, n_samples, n_out, dtype, parallel): + """Test the MinMaxLTTBDownsampler.""" + downsampler = NaNMinMaxLTTBDownsampler() + n_samples = int(n_samples.replace(",", "")) + n_out = int(n_out.replace(",", "")) + + x = np.arange(n_samples) + y = np.random.randn(n_samples).astype(dtype) + + benchmark(downsampler.downsample, x, y, n_out=n_out, parallel=parallel) + + # --------------------------------------------------------------------------- # # EveryNthDownsampler # --------------------------------------------------------------------------- # From 401338cf92b90ae5efa390d40af73644e3d58318 Mon Sep 17 00:00:00 2001 From: jvdd Date: Thu, 25 Jan 2024 10:46:39 +0100 Subject: [PATCH 22/26] :broom: --- downsample_rs/src/minmaxlttb.rs | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/downsample_rs/src/minmaxlttb.rs b/downsample_rs/src/minmaxlttb.rs index 959694b..60cc796 100644 --- a/downsample_rs/src/minmaxlttb.rs +++ b/downsample_rs/src/minmaxlttb.rs @@ -129,10 +129,7 @@ pub(crate) fn minmaxlttb_generic, Ty: Num + AsPrimiti n_out: usize, minmax_ratio: usize, f_minmax: fn(&[Tx], &[Ty], usize) -> Vec, -) -> Vec -where - // for<'a> &'a [Ty]: ArgMinMax, -{ +) -> Vec { assert_eq!(x.len(), y.len()); assert!(minmax_ratio > 1); // Apply first min max aggregation (if above ratio) @@ -179,10 +176,7 @@ pub(crate) fn minmaxlttb_generic_without_x>( n_out: usize, minmax_ratio: usize, f_minmax: fn(&[Ty], usize) -> Vec, -) -> Vec -where - // for<'a> &'a [Ty]: ArgMinMax, -{ +) -> Vec { assert!(minmax_ratio > 1); // Apply first min max aggregation (if above ratio) if y.len() / n_out > minmax_ratio { From 6aca12a21156274ad33282eabfefc178cfbfa87b Mon Sep 17 00:00:00 2001 From: jvdd Date: Thu, 25 Jan 2024 10:51:13 +0100 Subject: [PATCH 23/26] :broom: --- src/lib.rs | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 213578f..9348e01 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -113,11 +113,6 @@ macro_rules! _create_pyfunc_with_x_with_ratio { } macro_rules! _create_pyfuncs_with_x_generic { - // ($create_macro:ident, $resample_mod:ident, $resample_fn:ident, $mod:ident, $($t:ty)+) => { - // // The macro will implement the function for all combinations of $t (for type x and y). - // // (duplicate the list of types to iterate over all combinations) - // _create_pyfuncs_with_x_generic!(@inner $create_macro, $resample_mod, $resample_fn, $mod, $($t)+; $($t),+); - // }; ($create_macro:ident, $resample_mod:ident, $resample_fn:ident, $mod:ident, $($tx:ty)+, $($ty:ty)+) => { // The macro will implement the function for all combinations of $tx and $ty (for respectively type x and y). @@ -144,11 +139,6 @@ macro_rules! _create_pyfuncs_with_x_generic { // TODO: there must be a better way to combine normal and nan macros macro_rules! _create_nan_pyfuncs_with_x_generic { - // ($create_macro:ident, $resample_mod:ident, $resample_fn:ident, $mod:ident, $($t:ty)+) => { - // // The macro will implement the function for all combinations of $t (for type x and y). - // // (duplicate the list of types to iterate over all combinations) - // _create_pyfuncs_with_x_generic!(@inner $create_macro, $resample_mod, $resample_fn, $mod, $($t)+; $($t),+); - // }; ($create_macro:ident, $resample_mod:ident, $resample_fn:ident, $mod:ident, $($tx:ty)+, $($ty:ty)+) => { // The macro will implement the function for all combinations of $tx and $ty (for respectively type x and y). From d9c9e733a71f6e65a968b22866b3ba7b1a5b54ac Mon Sep 17 00:00:00 2001 From: jvdd Date: Thu, 25 Jan 2024 17:48:40 +0100 Subject: [PATCH 24/26] :broom: limit duplicate code --- tests/test_config.py | 1 - tests/test_tsdownsample.py | 30 +----- tsdownsample/downsampling_interface.py | 144 ++++++++++--------------- 3 files changed, 62 insertions(+), 113 deletions(-) diff --git a/tests/test_config.py b/tests/test_config.py index e1a6d31..f7632a7 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -17,7 +17,6 @@ supported_dtypes_x = _core_supported_dtypes supported_dtypes_y = _core_supported_dtypes + [np.float16, np.int8, np.uint8, np.bool_] -supported_dtypes_y_nan = [np.float16, np.float32, np.float64] _core_rust_primitive_types = ["f32", "f64", "i16", "i32", "i64", "u16", "u32", "u64"] diff --git a/tests/test_tsdownsample.py b/tests/test_tsdownsample.py index fb7c4af..45e23e3 100644 --- a/tests/test_tsdownsample.py +++ b/tests/test_tsdownsample.py @@ -2,7 +2,7 @@ import numpy as np import pytest -from test_config import supported_dtypes_x, supported_dtypes_y, supported_dtypes_y_nan +from test_config import supported_dtypes_x, supported_dtypes_y from tsdownsample import ( # MeanDownsampler,; MedianDownsampler, EveryNthDownsampler, @@ -185,12 +185,7 @@ def test_downsampling_different_dtypes(downsampler: AbstractDownsampler): """Test downsampling with different data types.""" arr_orig = np.random.randint(0, 100, size=10_000) res = [] - y_dtypes = ( - supported_dtypes_y_nan - if is_nan_downsampler(downsampler) - else supported_dtypes_y - ) - for dtype_y in y_dtypes: + for dtype_y in supported_dtypes_y: arr = arr_orig.astype(dtype_y) s_downsampled = downsampler.downsample(arr, n_out=100) if dtype_y is not np.bool_: @@ -204,15 +199,10 @@ def test_downsampling_different_dtypes_with_x(downsampler: AbstractDownsampler): """Test downsampling with x with different data types.""" arr_orig = np.random.randint(0, 100, size=10_000) idx_orig = np.arange(len(arr_orig)) - y_dtypes = ( - supported_dtypes_y_nan - if is_nan_downsampler(downsampler) - else supported_dtypes_y - ) for dtype_x in supported_dtypes_x: res = [] idx = idx_orig.astype(dtype_x) - for dtype_y in y_dtypes: + for dtype_y in supported_dtypes_y: arr = arr_orig.astype(dtype_y) s_downsampled = downsampler.downsample(idx, arr, n_out=100) if dtype_y is not np.bool_: @@ -228,12 +218,7 @@ def test_downsampling_no_out_of_bounds_different_dtypes( """Test no out of bounds issues when downsampling with different data types.""" arr_orig = np.random.randint(0, 100, size=100) res = [] - y_dtypes = ( - supported_dtypes_y_nan - if is_nan_downsampler(downsampler) - else supported_dtypes_y - ) - for dtype in y_dtypes: + for dtype in supported_dtypes_y: arr = arr_orig.astype(dtype) s_downsampled = downsampler.downsample(arr, n_out=76) s_downsampled_p = downsampler.downsample(arr, n_out=76, parallel=True) @@ -251,15 +236,10 @@ def test_downsampling_no_out_of_bounds_different_dtypes_with_x( """Test no out of bounds issues when downsampling with different data types.""" arr_orig = np.random.randint(0, 100, size=100) idx_orig = np.arange(len(arr_orig)) - y_dtypes = ( - supported_dtypes_y_nan - if is_nan_downsampler(downsampler) - else supported_dtypes_y - ) for dtype_x in supported_dtypes_x: res = [] idx = idx_orig.astype(dtype_x) - for dtype_y in y_dtypes: + for dtype_y in supported_dtypes_y: arr = arr_orig.astype(dtype_y) s_downsampled = downsampler.downsample(idx, arr, n_out=76) s_downsampled_p = downsampler.downsample(idx, arr, n_out=76, parallel=True) diff --git a/tsdownsample/downsampling_interface.py b/tsdownsample/downsampling_interface.py index 81a9bc5..cac1b53 100644 --- a/tsdownsample/downsampling_interface.py +++ b/tsdownsample/downsampling_interface.py @@ -163,6 +163,11 @@ class AbstractRustDownsampler(AbstractDownsampler, ABC): def __init__(self): super().__init__(True, _rust_dtypes, _y_rust_dtypes) # same for x and y + @property + def _downsample_func_prefix(self) -> str: + """The prefix of the downsample functions in the rust module.""" + return DOWNSAMPLE_F + @property def rust_mod(self) -> ModuleType: """The compiled Rust module for the current downsampler.""" @@ -197,8 +202,32 @@ def mod_multi_core(self) -> Union[ModuleType, None]: return None # no parallel compiled module available @staticmethod + def _view_x(x: np.ndarray) -> np.ndarray: + """View the x-data as different dtype (if necessary).""" + if np.issubdtype(x.dtype, np.datetime64): + # datetime64 is viewed as int64 + return x.view(dtype=np.int64) + elif np.issubdtype(x.dtype, np.timedelta64): + # timedelta64 is viewed as int64 + return x.view(dtype=np.int64) + return x + + @staticmethod + def _view_y(y: np.ndarray) -> np.ndarray: + """View the y-data as different dtype (if necessary).""" + if y.dtype == "bool": + # bool is viewed as int8 + return y.view(dtype=np.int8) + elif np.issubdtype(y.dtype, np.datetime64): + # datetime64 is viewed as int64 + return y.view(dtype=np.int64) + elif np.issubdtype(y.dtype, np.timedelta64): + # timedelta64 is viewed as int64 + return y.view(dtype=np.int64) + return y + def _switch_mod_with_y( - y_dtype: np.dtype, mod: ModuleType, downsample_func: str = DOWNSAMPLE_F + self, y_dtype: np.dtype, mod: ModuleType, downsample_func: str = None ) -> Callable: """Select the appropriate function from the rust module for the y-data. @@ -219,6 +248,8 @@ def _switch_mod_with_y( This argument is passed from the `_switch_mod_with_x_and_y` method when the x-data is considered in the downsampling. """ + if downsample_func is None: + downsample_func = self._downsample_func_prefix # FLOATS if np.issubdtype(y_dtype, np.floating): if y_dtype == np.float16: @@ -252,12 +283,12 @@ def _switch_mod_with_y( # BOOLS -> int8 (bool is viewed as int8) raise ValueError(f"Unsupported data type (for y): {y_dtype}") - @staticmethod def _switch_mod_with_x_and_y( + self, # necessary to access the class its _switch_mod_with_y method x_dtype: np.dtype, y_dtype: np.dtype, mod: ModuleType, - downsample_func: str = DOWNSAMPLE_F, + downsample_func: str = None, ) -> Callable: """The x-data is considered in the downsampling @@ -274,48 +305,32 @@ def _switch_mod_with_x_and_y( downsample_func : str, optional The name of the function to use, by default DOWNSAMPLE_FUNC. """ + if downsample_func is None: + downsample_func = self._downsample_func_prefix # FLOATS if np.issubdtype(x_dtype, np.floating): if x_dtype == np.float16: - return AbstractRustDownsampler._switch_mod_with_y( - y_dtype, mod, f"{downsample_func}_f16" - ) + return self._switch_mod_with_y(y_dtype, mod, f"{downsample_func}_f16") elif x_dtype == np.float32: - return AbstractRustDownsampler._switch_mod_with_y( - y_dtype, mod, f"{downsample_func}_f32" - ) + return self._switch_mod_with_y(y_dtype, mod, f"{downsample_func}_f32") elif x_dtype == np.float64: - return AbstractRustDownsampler._switch_mod_with_y( - y_dtype, mod, f"{downsample_func}_f64" - ) + return self._switch_mod_with_y(y_dtype, mod, f"{downsample_func}_f64") # UINTS elif np.issubdtype(x_dtype, np.unsignedinteger): if x_dtype == np.uint16: - return AbstractRustDownsampler._switch_mod_with_y( - y_dtype, mod, f"{downsample_func}_u16" - ) + return self._switch_mod_with_y(y_dtype, mod, f"{downsample_func}_u16") elif x_dtype == np.uint32: - return AbstractRustDownsampler._switch_mod_with_y( - y_dtype, mod, f"{downsample_func}_u32" - ) + return self._switch_mod_with_y(y_dtype, mod, f"{downsample_func}_u32") elif x_dtype == np.uint64: - return AbstractRustDownsampler._switch_mod_with_y( - y_dtype, mod, f"{downsample_func}_u64" - ) + return self._switch_mod_with_y(y_dtype, mod, f"{downsample_func}_u64") # INTS (need to be last because uint is subdtype of int) elif np.issubdtype(x_dtype, np.integer): if x_dtype == np.int16: - return AbstractRustDownsampler._switch_mod_with_y( - y_dtype, mod, f"{downsample_func}_i16" - ) + return self._switch_mod_with_y(y_dtype, mod, f"{downsample_func}_i16") elif x_dtype == np.int32: - return AbstractRustDownsampler._switch_mod_with_y( - y_dtype, mod, f"{downsample_func}_i32" - ) + return self._switch_mod_with_y(y_dtype, mod, f"{downsample_func}_i32") elif x_dtype == np.int64: - return AbstractRustDownsampler._switch_mod_with_y( - y_dtype, mod, f"{downsample_func}_i64" - ) + return self._switch_mod_with_y(y_dtype, mod, f"{downsample_func}_i64") # DATETIME -> i64 (datetime64 is viewed as int64) # TIMEDELTA -> i64 (timedelta64 is viewed as int64) raise ValueError(f"Unsupported data type (for x): {x_dtype}") @@ -340,29 +355,14 @@ def _downsample( else: mod = self.mod_multi_core ## Viewing the y-data as different dtype (if necessary) - if y.dtype == "bool": - # bool is viewed as int8 - y = y.view(dtype=np.int8) - elif np.issubdtype(y.dtype, np.datetime64): - # datetime64 is viewed as int64 - y = y.view(dtype=np.int64) - elif np.issubdtype(y.dtype, np.timedelta64): - # timedelta64 is viewed as int64 - y = y.view(dtype=np.int64) + y = self._view_y(y) ## Viewing the x-data as different dtype (if necessary) if x is None: downsample_f = self._switch_mod_with_y(y.dtype, mod) return downsample_f(y, n_out, **kwargs) - elif np.issubdtype(x.dtype, np.datetime64): - # datetime64 is viewed as int64 - x = x.view(dtype=np.int64) - elif np.issubdtype(x.dtype, np.timedelta64): - # timedelta64 is viewed as int64 - x = x.view(dtype=np.int64) + x = self._view_x(x) ## Getting the appropriate downsample function - downsample_f = self._switch_mod_with_x_and_y( - x.dtype, y.dtype, mod, DOWNSAMPLE_F - ) + downsample_f = self._switch_mod_with_x_and_y(x.dtype, y.dtype, mod) return downsample_f(x, y, n_out, **kwargs) def downsample(self, *args, n_out: int, parallel: bool = False, **kwargs): @@ -389,16 +389,19 @@ def __deepcopy__(self, memo): return result -_nan_y_rust_dtypes = ["float16", "float32", "float64"] NAN_DOWNSAMPLE_F = "downsample_nan" class AbstractRustNaNDownsampler(AbstractRustDownsampler, ABC): """RustNaNDownsampler interface-class, subclassed by concrete downsamplers.""" - @staticmethod + @property + def _downsample_func_prefix(self) -> str: + """The prefix of the downsample functions in the rust module.""" + return NAN_DOWNSAMPLE_F + def _switch_mod_with_y( - y_dtype: np.dtype, mod: ModuleType, downsample_func: str = NAN_DOWNSAMPLE_F + self, y_dtype: np.dtype, mod: ModuleType, downsample_func: str = None ) -> Callable: """Select the appropriate function from the rust module for the y-data. @@ -419,44 +422,11 @@ def _switch_mod_with_y( This argument is passed from the `_switch_mod_with_x_and_y` method when the x-data is considered in the downsampling. """ + if downsample_func is None: + downsample_func = self._downsample_func_prefix if not np.issubdtype(y_dtype, np.floating): # When y is not a float, we need to remove the _nan suffix to use the # regular downsample function as the _nan suffix is only used for floats. # (Note that NaNs only exist for floats) downsample_func = downsample_func.replace("_nan", "") - return AbstractRustDownsampler._switch_mod_with_y(y_dtype, mod, downsample_func) - - def _downsample( - self, - x: Union[np.ndarray, None], - y: np.ndarray, - n_out: int, - parallel: bool = False, - **kwargs, - ) -> np.ndarray: - """Downsample the data in x and y.""" - mod = self.mod_single_core - if parallel: - if self.mod_multi_core is None: - name = self.__class__.__name__ - warnings.warn( - f"No parallel implementation available for {name}. " - "Falling back to single-core implementation." - ) - else: - mod = self.mod_multi_core - ## Viewing the x-data as different dtype (if necessary) - if x is None: - downsample_f = self._switch_mod_with_y(y.dtype, mod, NAN_DOWNSAMPLE_F) - return downsample_f(y, n_out, **kwargs) - elif np.issubdtype(x.dtype, np.datetime64): - # datetime64 is viewed as int64 - x = x.view(dtype=np.int64) - elif np.issubdtype(x.dtype, np.timedelta64): - # timedelta64 is viewed as int64 - x = x.view(dtype=np.int64) - ## Getting the appropriate downsample function - downsample_f = self._switch_mod_with_x_and_y( - x.dtype, y.dtype, mod, NAN_DOWNSAMPLE_F - ) - return downsample_f(x, y, n_out, **kwargs) + return super()._switch_mod_with_y(y_dtype, mod, downsample_func) From 39a7ab72509b454f1bd239af242364d84d71119c Mon Sep 17 00:00:00 2001 From: jvdd Date: Thu, 25 Jan 2024 17:52:39 +0100 Subject: [PATCH 25/26] :see_no_evil: fix linting --- tsdownsample/downsampling_interface.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tsdownsample/downsampling_interface.py b/tsdownsample/downsampling_interface.py index cac1b53..9c05c54 100644 --- a/tsdownsample/downsampling_interface.py +++ b/tsdownsample/downsampling_interface.py @@ -227,7 +227,7 @@ def _view_y(y: np.ndarray) -> np.ndarray: return y def _switch_mod_with_y( - self, y_dtype: np.dtype, mod: ModuleType, downsample_func: str = None + self, y_dtype: np.dtype, mod: ModuleType, downsample_func: Optional[str] = None ) -> Callable: """Select the appropriate function from the rust module for the y-data. @@ -288,7 +288,7 @@ def _switch_mod_with_x_and_y( x_dtype: np.dtype, y_dtype: np.dtype, mod: ModuleType, - downsample_func: str = None, + downsample_func: Optional[str] = None, ) -> Callable: """The x-data is considered in the downsampling @@ -401,7 +401,7 @@ def _downsample_func_prefix(self) -> str: return NAN_DOWNSAMPLE_F def _switch_mod_with_y( - self, y_dtype: np.dtype, mod: ModuleType, downsample_func: str = None + self, y_dtype: np.dtype, mod: ModuleType, downsample_func: Optional[str] = None ) -> Callable: """Select the appropriate function from the rust module for the y-data. From 7a7cfd3254ec4e0c8504e006eb996c83c21338a4 Mon Sep 17 00:00:00 2001 From: jvdd Date: Fri, 2 Feb 2024 09:38:06 +0100 Subject: [PATCH 26/26] :broom: --- tests/test_tsdownsample.py | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/tests/test_tsdownsample.py b/tests/test_tsdownsample.py index 45e23e3..993faa6 100644 --- a/tests/test_tsdownsample.py +++ b/tests/test_tsdownsample.py @@ -54,18 +54,9 @@ def generate_all_downsamplers() -> Iterable[AbstractDownsampler]: yield downsampler -def is_nan_downsampler(obj): - return obj.__class__.__name__ in [ - x.__class__.__name__ for x in RUST_NAN_DOWNSAMPLERS - ] - - -def generate_datapoints(obj): +def generate_datapoints(): N_DATAPOINTS = 10_000 - if is_nan_downsampler(obj): - return np.arange(N_DATAPOINTS, dtype=np.float64) - else: - return np.arange(N_DATAPOINTS) + return np.arange(N_DATAPOINTS) def generate_nan_datapoints(): @@ -84,7 +75,7 @@ def test_serialization_copy(downsampler: AbstractDownsampler): dc = copy(downsampler) ddc = deepcopy(downsampler) - arr = generate_datapoints(downsampler) + arr = generate_datapoints() orig_downsampled = downsampler.downsample(arr, n_out=100) dc_downsampled = dc.downsample(arr, n_out=100) @@ -100,7 +91,7 @@ def test_serialization_pickle(downsampler: AbstractDownsampler): dc = pickle.loads(pickle.dumps(downsampler)) - arr = generate_datapoints(downsampler) + arr = generate_datapoints() orig_downsampled = downsampler.downsample(arr, n_out=100) dc_downsampled = dc.downsample(arr, n_out=100) assert np.all(orig_downsampled == dc_downsampled) @@ -109,7 +100,7 @@ def test_serialization_pickle(downsampler: AbstractDownsampler): @pytest.mark.parametrize("downsampler", generate_rust_downsamplers()) def test_rust_downsampler(downsampler: AbstractDownsampler): """Test the Rust downsamplers.""" - arr = generate_datapoints(downsampler) + arr = generate_datapoints() s_downsampled = downsampler.downsample(arr, n_out=100) assert s_downsampled[0] == 0 assert s_downsampled[-1] == len(arr) - 1