Skip to content

Commit

Permalink
Use f64::mul_add if the std feature is enabled
Browse files Browse the repository at this point in the history
- May give better performance on some target architectures
  • Loading branch information
ajtribick committed Nov 24, 2024
1 parent bde0398 commit 3e96bbd
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 12 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# Changelog

## Version 0.8.1

* Re-enable use of `std::mul_add` if the `std` feature is enabled (except on
MinGW).

## Version 0.8

* Always use libm functions.
Expand Down
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "twofloat"
version = "0.8.0"
version = "0.8.1"
authors = ["Andrew Tribick", "Individual contributors"]
keywords = ["float", "precision", "numerics", "floating-point", "arithmetic"]
categories = ["algorithms", "mathematics", "science"]
Expand Down
31 changes: 22 additions & 9 deletions src/arithmetic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,19 @@ use core::ops::{

use crate::TwoFloat;

// MinGW FMA seems to be inaccurate, use libm even if std is enabled.
#[cfg(all(feature = "std", not(all(windows, target_env = "gnu"))))]
#[inline(always)]
fn fma(x: f64, y: f64, z: f64) -> f64 {
f64::mul_add(x, y, z)
}

#[cfg(not(all(feature = "std", not(all(windows, target_env = "gnu")))))]
#[inline(always)]
fn fma(x: f64, y: f64, z: f64) -> f64 {
libm::fma(x, y, z)
}

pub(crate) fn fast_two_sum(a: f64, b: f64) -> TwoFloat {
// Joldes et al. (2017) Algorithm 1
let s = a + b;
Expand Down Expand Up @@ -43,7 +56,7 @@ impl TwoFloat {
let p = a * b;
Self {
hi: p,
lo: libm::fma(a, b, -p),
lo: fma(a, b, -p),
}
}

Expand Down Expand Up @@ -128,15 +141,15 @@ binary_ops! {
/// (2017) Algorithm 9.
fn Mul::mul<'a, 'b>(self: &'a TwoFloat, rhs: &'b f64) -> TwoFloat {
let (ch, cl1) = TwoFloat::new_mul(self.hi, *rhs).into();
let cl3 = libm::fma(self.lo, *rhs, cl1);
let cl3 = fma(self.lo, *rhs, cl1);
fast_two_sum(ch, cl3)
}

/// Implements multiplication of `TwoFloat` and `f64` using Joldes et al.
/// (2017) Algorithm 9.
fn Mul::mul<'a, 'b>(self: &'a f64, rhs: &'b TwoFloat) -> TwoFloat {
let (ch, cl1) = TwoFloat::new_mul(rhs.hi, *self).into();
let cl3 = libm::fma(rhs.lo, *self, cl1);
let cl3 = fma(rhs.lo, *self, cl1);
fast_two_sum(ch, cl3)
}

Expand All @@ -145,8 +158,8 @@ binary_ops! {
fn Mul::mul<'a, 'b>(self: &'a TwoFloat, rhs: &'b TwoFloat) -> TwoFloat {
let (ch, cl1) = TwoFloat::new_mul(self.hi, rhs.hi).into();
let tl0 = self.lo * rhs.lo;
let tl1 = libm::fma(self.hi, rhs.lo, tl0);
let cl2 = libm::fma(self.lo, rhs.hi, tl1);
let tl1 = fma(self.hi, rhs.lo, tl0);
let cl2 = fma(self.lo, rhs.hi, tl1);
let cl3 = cl1 + cl2;
fast_two_sum(ch, cl3)
}
Expand Down Expand Up @@ -175,7 +188,7 @@ binary_ops! {
let d = e * th;
let m = d + th;
let (ch, cl1) = TwoFloat::new_mul(m.hi, *self).into();
let cl3 = libm::fma(m.lo, *self, cl1);
let cl3 = fma(m.lo, *self, cl1);
fast_two_sum(ch, cl3)
}

Expand Down Expand Up @@ -253,7 +266,7 @@ assign_ops! {
/// (2017) Algorithm 9.
fn MulAssign::mul_assign<'a>(self: &mut TwoFloat, rhs: &'a f64) {
let (ch, cl1) = TwoFloat::new_mul(self.hi, *rhs).into();
let cl3 = libm::fma(self.lo, *rhs, cl1);
let cl3 = fma(self.lo, *rhs, cl1);
*self = fast_two_sum(ch, cl3);
}

Expand All @@ -262,8 +275,8 @@ assign_ops! {
fn MulAssign::mul_assign<'a>(self: &mut TwoFloat, rhs: &'a TwoFloat) {
let (ch, cl1) = TwoFloat::new_mul(self.hi, rhs.hi).into();
let tl0 = self.lo * rhs.lo;
let tl1 = libm::fma(self.hi, rhs.lo, tl0);
let cl2 = libm::fma(self.lo, rhs.hi, tl1);
let tl1 = fma(self.hi, rhs.lo, tl0);
let cl2 = fma(self.lo, rhs.hi, tl1);
let cl3 = cl1 + cl2;
*self = fast_two_sum(ch, cl3)
}
Expand Down
8 changes: 7 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,19 @@ as preliminary.
Operations on non-finite values are not supported. At the moment this is not
automatically checked. The `is_valid()` method is provided for this purpose.
If the `std` feature is enabled (as it is by default), the fused multiply-add
operation from the standard library is used. This *may* be more performant if
the target architecture has a dedicated instruction for this. See the
documentation of [`f64::mul_add`] for details. Otherwise the libm
implementation is used.
If the `serde` feature is enabled, serialization and deserialization is
possible through the Serde library.
## Known issues
* The MinGW `fma` implementation appears to give incorrect results in some
cases, so the libm implementation is always used on this platform.
cases, so the libm function is always used on this platform.
## References
Expand Down
2 changes: 1 addition & 1 deletion tests/common/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use twofloat::{TwoFloat, TwoFloatError};
// This runs substantially slower than the other CI targets, so reduce the
// number of iterations
#[cfg(all(target_arch = "aarch64", target_os = "linux"))]
const TEST_ITERS: usize = 1000;
const TEST_ITERS: usize = 10000;

#[cfg(not(all(target_arch = "aarch64", target_os = "linux")))]
const TEST_ITERS: usize = 100000;
Expand Down

0 comments on commit 3e96bbd

Please sign in to comment.