Skip to content

Commit

Permalink
Reformat
Browse files Browse the repository at this point in the history
  • Loading branch information
awxkee committed Dec 19, 2024
1 parent 894d4a0 commit cfd026f
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 62 deletions.
85 changes: 43 additions & 42 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ YUV 8 bit-depth conversion
`aarch64` tested on Mac Pro M3.

AVX2 tests performed on `Digital Ocean Shared Premium Intel 2 vCPU` droplet.
AVX2 Win test performed on Windows 11 Intel Core i9-14900HX.

```bash
cargo bench --bench yuv8 --manifest-path ./app/Cargo.toml
Expand All @@ -87,31 +88,31 @@ cargo +nightly bench --bench yuv8 --manifest-path ./app/Cargo.toml --features ni

### Encoding 8-bit

| Conversion | time(NEON) | Time(AVX2) | Time(AVX-512) |
|------------------------|:----------:|:----------:|:-------------:|
| utils RGB->YUV 4:2:0 | 3.16ms | 16.28ms | 8.93ms |
| libyuv RGB->YUV 4:2:0 | 3.58ms | 17.64ms | 12.83ms |
| utils RGBA->YUV 4:2:0 | 4.04ms | 12.63ms | 10.27ms |
| libyuv RGBA->YUV 4:2:0 | 4.87ms | 11.27ms | 10.73ms |
| utils RGBA->YUV 4:2:2 | 4.34ms | 24.02ms | 18.53ms |
| libyuv RGBA->YUV 4:2:2 | 5.90ms | 19.43ms | 17.94ms |
| utils RGBA->YUV 4:4:4 | 4.49ms | 29.18ms | 22.08ms |
| Conversion | time(NEON) | Time(AVX2 Win) | Time(AVX2) | Time(AVX-512) |
|------------------------|:----------:|:--------------:|:----------:|:-------------:|
| utils RGB->YUV 4:2:0 | 3.16ms | 5.54ms | 16.28ms | 8.93ms |
| libyuv RGB->YUV 4:2:0 | 3.58ms | 34.30ms | 17.64ms | 12.83ms |
| utils RGBA->YUV 4:2:0 | 4.04ms | 5.78ms | 12.63ms | 10.27ms |
| libyuv RGBA->YUV 4:2:0 | 4.87ms | 25.29ms | 11.27ms | 10.73ms |
| utils RGBA->YUV 4:2:2 | 4.34ms | 7.35ms | 24.02ms | 18.53ms |
| libyuv RGBA->YUV 4:2:2 | 5.90ms | 37.65ms | 19.43ms | 17.94ms |
| utils RGBA->YUV 4:4:4 | 4.49ms | 8.97ms | 29.18ms | 22.08ms |

### Decoding 8-bit

| Conversion | time(NEON) | Time(AVX2) | Time(AVX-512) |
|------------------------|:----------:|:----------:|:-------------:|
| utils YUV NV12->RGBA | 3.91ms | 22.59ms | 18.55ms |
| utils YUV NV12->RGB | 3.28ms | 17.56ms | 13.64ms |
| libyuv YUV NV12->RGB | 5.20ms | 22.27ms | 18.55ms |
| utils YUV 4:2:0->RGB | 3.15ms | 17.69ms | 13.70ms |
| libyuv YUV 4:2:0->RGB | 5.70ms | 23.91ms | 20.07ms |
| utils YUV 4:2:0->RGBA | 3.70ms | 20.81ms | 18.84ms |
| libyuv YUV 4:2:0->RGBA | 6.13ms | 24.32ms | 18.50ms |
| utils YUV 4:2:2->RGBA | 4.05ms | 24.44ms | 22.05ms |
| libyuv YUV 4:2:2->RGBA | 5.91ms | 23.72ms | 18.71ms |
| utils YUV 4:4:4->RGBA | 3.91ms | 27.58ms | 22.85ms |
| libyuv YUV 4:4:4->RGBA | 4.82ms | 34.60ms | 21.47ms |
| Conversion | time(NEON) | Time(AVX2 Win) | Time(AVX2) | Time(AVX-512) |
|------------------------|:----------:|:--------------:|:----------:|:-------------:|
| utils YUV NV12->RGBA | 3.91ms | 5.15ms | 22.59ms | 18.55ms |
| utils YUV NV12->RGB | 3.28ms | 6.71ms | 17.56ms | 13.64ms |
| libyuv YUV NV12->RGB | 5.20ms | 50.16ms | 22.27ms | 18.55ms |
| utils YUV 4:2:0->RGB | 3.15ms | 5.15ms | 17.69ms | 13.70ms |
| libyuv YUV 4:2:0->RGB | 5.70ms | 48.52ms | 23.91ms | 20.07ms |
| utils YUV 4:2:0->RGBA | 3.70ms | 6.70ms | 20.81ms | 18.84ms |
| libyuv YUV 4:2:0->RGBA | 6.13ms | 7.20ms | 24.32ms | 18.50ms |
| utils YUV 4:2:2->RGBA | 4.05ms | 7.61ms | 24.44ms | 22.05ms |
| libyuv YUV 4:2:2->RGBA | 5.91ms | 7.48ms | 23.72ms | 18.71ms |
| utils YUV 4:4:4->RGBA | 3.91ms | 7.65ms | 27.58ms | 22.85ms |
| libyuv YUV 4:4:4->RGBA | 4.82ms | 7.55ms | 34.60ms | 21.47ms |

YUV 16 bit-depth conversion

Expand All @@ -123,29 +124,29 @@ cargo bench --bench yuv16 --manifest-path ./app/Cargo.toml

10-bit encoding is not implemented in `libyuv`

| | time(NEON) | Time(AVX2) |
|----------------------------|:----------:|:----------:|
| utils RGB10->YUV10 4:2:0 | 4.98ms | 33.88ms |
| libyuv RGB10->YUV10 4:2:0 | x | x |
| utils RGBA10->YUV10 4:2:0 | 6.03ms | 32.69ms |
| libyuv RGBA10->YUV10 4:2:0 | x | x |
| utils RGBA10->YUV10 4:2:2 | 5.99ms | 50.26ms |
| libyuv RGBA10->YUV10 4:2:2 | x | x |
| utils RGBA10->YUV10 4:4:4 | 4.84ms | 70.11ms |
| | time(NEON) | Time(AVX2 Win) | Time(AVX2) |
|----------------------------|:----------:|:--------------:|:----------:|
| utils RGB10->YUV10 4:2:0 | 4.98ms | 9.13ms | 33.88ms |
| libyuv RGB10->YUV10 4:2:0 | x | x | x |
| utils RGBA10->YUV10 4:2:0 | 6.03ms | 10.82ms | 32.69ms |
| libyuv RGBA10->YUV10 4:2:0 | x | x | x |
| utils RGBA10->YUV10 4:2:2 | 5.99ms | 14.74ms | 50.26ms |
| libyuv RGBA10->YUV10 4:2:2 | x | x | x |
| utils RGBA10->YUV10 4:4:4 | 4.84ms | 16.49ms | 70.11ms |

### Decoding 10-bit

| | time(NEON) | Time(AVX2) |
|----------------------------|:----------:|:----------:|
| utils YUV10 4:2:0->RGB10 | 5.64ms | 45.58ms |
| libyuv YUV10 4:2:0->RGB10 | - | - |
| utils YUV10 4:2:0->RGBA10 | 6.03ms | 65.95ms |
| utils YUV10 4:2:0->RGBA8 | 6.94ms | 31.15ms |
| libyuv YUV10 4:2:0->RGBA8 | 12.39ms | 24.59ms |
| utils YUV10 4:2:2->RGBA10 | 5.88ms | 59.44ms |
| utils YUV10 4:2:2->RGBA8 | 7.33ms | 29.15ms |
| libyuv YUV10 4:2:2->RGBA10 | 12.40ms | 29.96ms |
| utils YUV10 4:4:4->RGBA10 | 6.01ms | 70.84ms |
| | time(NEON) | Time(AVX2 Win) | Time(AVX2) |
|---------------------------|:----------:|:--------------:|:----------:|
| utils YUV10 4:2:0->RGB10 | 5.64ms | 11.06ms | 45.58ms |
| libyuv YUV10 4:2:0->RGB10 | - | - | - |
| utils YUV10 4:2:0->RGBA10 | 6.03ms | 14.85ms | 65.95ms |
| utils YUV10 4:2:0->RGBA8 | 6.94ms | 8.77ms | 31.15ms |
| libyuv YUV10 4:2:0->RGBA8 | 12.39ms | 62.01ms | 24.59ms |
| utils YUV10 4:2:2->RGBA10 | 5.88ms | 15.92ms | 59.44ms |
| utils YUV10 4:2:2->RGBA8 | 7.33ms | 8.76ms | 29.15ms |
| libyuv YUV10 4:2:2->RGBA8 | 12.40ms | 61.28ms | 29.96ms |
| utils YUV10 4:4:4->RGBA10 | 6.01ms | 16.09ms | 70.84ms |

This project is licensed under either of

Expand Down
21 changes: 5 additions & 16 deletions src/avx512bw/rgba_to_yuv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,20 +49,10 @@ pub(crate) fn avx512_rgba_to_yuv<const ORIGIN_CHANNELS: u8, const SAMPLING: u8>(
start_cx: usize,
start_ux: usize,
width: usize,
compute_uv_row: bool,
) -> ProcessedOffset {
unsafe {
avx512_rgba_to_yuv_impl::<ORIGIN_CHANNELS, SAMPLING>(
transform,
range,
y_plane,
u_plane,
v_plane,
rgba,
start_cx,
start_ux,
width,
compute_uv_row,
transform, range, y_plane, u_plane, v_plane, rgba, start_cx, start_ux, width,
)
}
}
Expand All @@ -78,7 +68,6 @@ unsafe fn avx512_rgba_to_yuv_impl<const ORIGIN_CHANNELS: u8, const SAMPLING: u8>
start_cx: usize,
start_ux: usize,
width: usize,
compute_uv_row: bool,
) -> ProcessedOffset {
let chroma_subsampling: YuvChromaSubsampling = SAMPLING.into();
let source_channels: YuvSourceChannels = ORIGIN_CHANNELS.into();
Expand Down Expand Up @@ -238,11 +227,11 @@ unsafe fn avx512_rgba_to_yuv_impl<const ORIGIN_CHANNELS: u8, const SAMPLING: u8>
_mm512_storeu_si512(v_ptr.add(uv_x) as *mut i32, cr);
uv_x += 64;
} else if chroma_subsampling == YuvChromaSubsampling::Yuv422
|| (chroma_subsampling == YuvChromaSubsampling::Yuv420 && compute_uv_row)
|| (chroma_subsampling == YuvChromaSubsampling::Yuv420)
{
let r1 = avx512_pairwise_avg_epi8(r_values);
let g1 = avx512_pairwise_avg_epi8(g_values);
let b1 = avx512_pairwise_avg_epi8(b_values);
let r1 = _mm512_slli_epi16::<V_SCALE>(avx512_pairwise_avg_epi8(r_values));
let g1 = _mm512_slli_epi16::<V_SCALE>(avx512_pairwise_avg_epi8(g_values));
let b1 = _mm512_slli_epi16::<V_SCALE>(avx512_pairwise_avg_epi8(b_values));

let cbk = _mm512_max_epi16(
_mm512_min_epi16(
Expand Down
7 changes: 4 additions & 3 deletions src/avx512bw/rgba_to_yuv420.rs
Original file line number Diff line number Diff line change
Expand Up @@ -209,9 +209,10 @@ unsafe fn avx512_rgba_to_yuv_impl420<const ORIGIN_CHANNELS: u8>(
y_plane1.get_unchecked_mut(cx..).as_mut_ptr() as *mut i32,
y_yuv1,
);
let r1 = avx512_pairwise_avg_epi16_epi8(r_values0, r_values1);
let g1 = avx512_pairwise_avg_epi16_epi8(g_values0, g_values1);
let b1 = avx512_pairwise_avg_epi16_epi8(b_values0, b_values1);

let r1 = _mm512_slli_epi16::<V_SCALE>(avx512_pairwise_avg_epi16_epi8(r_values0, r_values1));
let g1 = _mm512_slli_epi16::<V_SCALE>(avx512_pairwise_avg_epi16_epi8(g_values0, g_values1));
let b1 = _mm512_slli_epi16::<V_SCALE>(avx512_pairwise_avg_epi16_epi8(b_values0, b_values1));

let cbk = _mm512_max_epi16(
_mm512_min_epi16(
Expand Down
1 change: 0 additions & 1 deletion src/rgba_to_yuv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,6 @@ fn rgbx_to_yuv8<const ORIGIN_CHANNELS: u8, const SAMPLING: u8>(
_offset.cx,
_offset.ux,
image.width as usize,
compute_uv_row,
);
_offset = processed_offset;
}
Expand Down

0 comments on commit cfd026f

Please sign in to comment.