Skip to content

Commit

Permalink
Re-structure architecture specific implementations
Browse files Browse the repository at this point in the history
- Add `arch` folder to group implementations by
target arch
- Add `cfg-if` to reduce headaches when reading
`cfg` compile time feature gates
- Rename `[Ii]mp` -> `[Uu]pdate`
  • Loading branch information
mcountryman committed Mar 29, 2024
1 parent cc3155d commit 08e7454
Show file tree
Hide file tree
Showing 18 changed files with 685 additions and 1,080 deletions.
198 changes: 74 additions & 124 deletions Cargo.lock

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,6 @@ criterion = "0.3"
# competition
adler = "1.0.2"
adler32 = "1.2.0"

[dependencies]
cfg-if = "1.0.0"
54 changes: 33 additions & 21 deletions bench/variants.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,57 +3,69 @@ use criterion::{
Criterion, Throughput,
};
use rand::{thread_rng, RngCore};
use simd_adler32::imp::{avx2, avx512, scalar, sse2, ssse3, wasm, Adler32Imp};
use simd_adler32::{arch::*, update::Adler32Update};

pub fn bench(c: &mut Criterion) {
let mut data = [0; 100_000];
let mut data = [1; 100_000];
let mut group = c.benchmark_group("variants");

thread_rng().fill_bytes(&mut data[..]);

if let Some(update) = avx512::get_imp() {
bench_variant(&mut group, "avx512", &data, update);
}
cfg_if::cfg_if! {
if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {

if let Some(update) = avx2::get_imp() {
bench_variant(&mut group, "avx2", &data, update);
}
if let Some(update) = x86::avx512::get_update_if_supported() {
bench_variant(&mut group, "avx512", &data, update);
}

if let Some(update) = ssse3::get_imp() {
bench_variant(&mut group, "ssse3", &data, update);
}
if let Some(update) = x86::avx2::get_update_if_supported() {
bench_variant(&mut group, "avx2", &data, update);
}

if let Some(update) = sse2::get_imp() {
bench_variant(&mut group, "sse2", &data, update);
}
if let Some(update) = x86::ssse3::get_update_if_supported() {
bench_variant(&mut group, "ssse3", &data, update);
}

if let Some(update) = x86::sse2::get_update_if_supported() {
bench_variant(&mut group, "sse2", &data, update);
}

} else if #[cfg(any(target_arch = "wasm32", target_arch = "wasm64"))] {

if let Some(update) = wasm::get_update_if_supported() {
bench_variant(&mut group, "wasm", &data, update);
}

if let Some(update) = wasm::get_imp() {
bench_variant(&mut group, "wasm", &data, update);
}
}

bench_variant(&mut group, "scalar", &data, scalar::update);
}

fn bench_variant<M>(g: &mut BenchmarkGroup<M>, name: &str, data: &[u8], imp: Adler32Imp)
where
fn bench_variant<M>(
g: &mut BenchmarkGroup<M>,
name: &str,
data: &[u8],
update: Adler32Update,
) where
M: Measurement,
{
g.throughput(Throughput::Bytes(10)).bench_with_input(
format!("{}-10b", name),
&data[..10],
|b, data| b.iter(|| black_box(imp(1, 0, data))),
|b, data| b.iter(|| black_box(update(1, 0, data))),
);

g.throughput(Throughput::Bytes(10_000)).bench_with_input(
format!("{}-10k", name),
&data[..10_000],
|b, data| b.iter(|| black_box(imp(1, 0, data))),
|b, data| b.iter(|| black_box(update(1, 0, data))),
);

g.throughput(Throughput::Bytes(100_000)).bench_with_input(
format!("{}-100k", name),
&data[..100_000],
|b, data| b.iter(|| black_box(imp(1, 0, data))),
|b, data| b.iter(|| black_box(update(1, 0, data))),
);
}

Expand Down
5 changes: 5 additions & 0 deletions src/arch.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pub mod scalar;
#[cfg(any(target_arch = "wasm32", target_arch = "wasm64"))]
pub mod wasm;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
pub mod x86;
File renamed without changes.
30 changes: 30 additions & 0 deletions src/arch/scalar.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
const MOD: u32 = 65521;
const NMAX: usize = 5552;

pub fn update(a: u16, b: u16, data: &[u8]) -> (u16, u16) {
let mut a = a as u32;
let mut b = b as u32;

let chunks = data.chunks_exact(NMAX);
let remainder = chunks.remainder();

for chunk in chunks {
for byte in chunk {
a = a.wrapping_add(*byte as _);
b = b.wrapping_add(a);
}

a %= MOD;
b %= MOD;
}

for byte in remainder {
a = a.wrapping_add(*byte as _);
b = b.wrapping_add(a);
}

a %= MOD;
b %= MOD;

(a as u16, b as u16)
}
86 changes: 18 additions & 68 deletions src/imp/wasm.rs → src/arch/wasm.rs
Original file line number Diff line number Diff line change
@@ -1,20 +1,26 @@
use super::Adler32Imp;

/// Resolves update implementation if CPU supports simd128 instructions.
pub fn get_imp() -> Option<Adler32Imp> {
get_imp_inner()
use crate::update::Adler32Update;

pub fn get_update_if_supported() -> Option<Adler32Update> {
cfg_if::cfg_if! {
if #[cfg(target_feature = "simd128")] {
Some(|a, b, bytes| unsafe { update(a, b, bytes) })
} else {
None
}
}
}

#[inline]
#[cfg(target_feature = "simd128")]
fn get_imp_inner() -> Option<Adler32Imp> {
Some(imp::update)
#[inline]
pub unsafe fn update(a: u16, b: u16, data: &[u8]) -> (u16, u16) {
imp::update(a, b, data)
}

#[inline]
#[cfg(not(target_feature = "simd128"))]
fn get_imp_inner() -> Option<Adler32Imp> {
None
mod imp {
pub unsafe fn update(_: u16, _: u16, _: &[u8]) -> (u16, u16) {
panic!("Target platform does not support `simd128`")
}
}

#[cfg(target_feature = "simd128")]
Expand All @@ -29,13 +35,9 @@ mod imp {
#[cfg(target_arch = "wasm64")]
use core::arch::wasm64::*;

pub fn update(a: u16, b: u16, data: &[u8]) -> (u16, u16) {
update_imp(a, b, data)
}

#[inline]
#[target_feature(enable = "simd128")]
fn update_imp(a: u16, b: u16, data: &[u8]) -> (u16, u16) {
pub unsafe fn update(a: u16, b: u16, data: &[u8]) -> (u16, u16) {
let mut a = a as u32;
let mut b = b as u32;

Expand Down Expand Up @@ -163,55 +165,3 @@ mod imp {
u8x16(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1)
}
}

#[cfg(test)]
mod tests {
use rand::Rng;

#[test]
fn zeroes() {
assert_sum_eq(&[]);
assert_sum_eq(&[0]);
assert_sum_eq(&[0, 0]);
assert_sum_eq(&[0; 100]);
assert_sum_eq(&[0; 1024]);
assert_sum_eq(&[0; 512 * 1024]);
}

#[test]
fn ones() {
assert_sum_eq(&[]);
assert_sum_eq(&[1]);
assert_sum_eq(&[1, 1]);
assert_sum_eq(&[1; 100]);
assert_sum_eq(&[1; 1024]);
assert_sum_eq(&[1; 512 * 1024]);
}

#[test]
fn random() {
let mut random = [0; 512 * 1024];
rand::thread_rng().fill(&mut random[..]);

assert_sum_eq(&random[..1]);
assert_sum_eq(&random[..100]);
assert_sum_eq(&random[..1024]);
assert_sum_eq(&random[..512 * 1024]);
}

/// Example calculation from https://en.wikipedia.org/wiki/Adler-32.
#[test]
fn wiki() {
assert_sum_eq(b"Wikipedia");
}

fn assert_sum_eq(data: &[u8]) {
if let Some(update) = super::get_imp() {
let (a, b) = update(1, 0, data);
let left = u32::from(b) << 16 | u32::from(a);
let right = adler::adler32_slice(data);

assert_eq!(left, right, "len({})", data.len());
}
}
}
41 changes: 41 additions & 0 deletions src/arch/x86.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
pub mod avx2;
pub mod avx512;
pub mod sse2;
pub mod ssse3;

/// A macro to test whether a CPU feature is available on x86/x86-x64 platforms.
///
/// This macro will attempt to test at runtime if `std` feature is enabled. Otherwise will
/// fallback to target_feature conditional compilation flags.
#[allow(unused_macros)]
macro_rules! is_x86_feature_detected {
($name:tt) => {{
#[cfg(feature = "std")]
#[inline(always)]
fn __is_x86_feature_detected() -> bool {
std::is_x86_feature_detected!($name)
}

#[cfg(all(not(feature = "std"), target_feature = $name))]
#[inline(always)]
fn __is_x86_feature_detected() -> bool {
true
}

#[cfg(all(not(feature = "std"), not(target_feature = $name)))]
#[inline(always)]
fn __is_x86_feature_detected() -> bool {
false
}

__is_x86_feature_detected()
}};
}

pub(crate) use is_x86_feature_detected;

#[inline]
#[allow(non_snake_case)]
pub const fn _mm_shuffle(z: u32, y: u32, x: u32, w: u32) -> i32 {
((z << 6) | (y << 4) | (x << 2) | w) as i32
}
Loading

0 comments on commit 08e7454

Please sign in to comment.