From 2fc85bd5a42511c74426778cbba2d38534f7aade Mon Sep 17 00:00:00 2001 From: Tymur Date: Tue, 22 Oct 2024 20:37:35 +0300 Subject: [PATCH] decode: fixed the SIMD-based next_start_code conformance with rare CPU-models --- vk_video_decoder/libs/NvVideoParser/CMakeLists.txt | 10 +++++----- vk_video_decoder/libs/NvVideoParser/src/cpudetect.cpp | 9 ++++----- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/vk_video_decoder/libs/NvVideoParser/CMakeLists.txt b/vk_video_decoder/libs/NvVideoParser/CMakeLists.txt index 5f72a150..2c885a9b 100644 --- a/vk_video_decoder/libs/NvVideoParser/CMakeLists.txt +++ b/vk_video_decoder/libs/NvVideoParser/CMakeLists.txt @@ -53,11 +53,11 @@ if (CMAKE_SYSTEM_PROCESSOR MATCHES "^aarch64") if(WIN32) set(GENERIC_CPU_FEATURE "/arch:armv8.0+nosimd") set(NEON_CPU_FEATURE "/arch:armv8.0") - set(SVE_CPU_FEATURE "/arch:armv8.2+sve") + set(SVE_CPU_FEATURE "/arch:armv8.0+sve") elseif(UNIX) set(GENERIC_CPU_FEATURE "-march=armv8-a+nosimd") set(NEON_CPU_FEATURE "-march=armv8-a") - set(SVE_CPU_FEATURE "-march=armv8.2-a+sve") + set(SVE_CPU_FEATURE "-march=armv8-a+sve") endif() add_library(next_start_code_c OBJECT ${CMAKE_CURRENT_SOURCE_DIR}/src/NextStartCodeC.cpp include) set_target_properties(next_start_code_c PROPERTIES COMPILE_FLAGS ${GENERIC_CPU_FEATURE} ) @@ -92,9 +92,9 @@ else() elseif(UNIX) set(GENERIC_CPU_FEATURE "-mtune=generic -mno-sse -mno-sse2 -mno-ssse3 -mno-sse4 -mno-sse4.2 -mno-avx \ -mno-avx2 -mno-avx512f -mno-avx512vl -mno-avx512bw -mno-bmi2") - set(SSSE3_CPU_FEATURE "-march=nehalem") - set(AVX2_CPU_FEATURE "-march=haswell") - set(AVX512_CPU_FEATURE "-march=skylake-avx512") + set(SSSE3_CPU_FEATURE "-mssse3") + set(AVX2_CPU_FEATURE "-mavx2") + set(AVX512_CPU_FEATURE "-mavx512f -mavx512bw") endif() add_library(next_start_code_c OBJECT ${CMAKE_CURRENT_SOURCE_DIR}/src/NextStartCodeC.cpp include) if(NOT WIN32) diff --git a/vk_video_decoder/libs/NvVideoParser/src/cpudetect.cpp b/vk_video_decoder/libs/NvVideoParser/src/cpudetect.cpp index 62dee4ff..ba79e4ee 100644 --- a/vk_video_decoder/libs/NvVideoParser/src/cpudetect.cpp +++ b/vk_video_decoder/libs/NvVideoParser/src/cpudetect.cpp @@ -33,9 +33,8 @@ class InstructionSet static bool AVX(void) { return CPU_Rep.f_1_ECX_[28]; } static bool AVX2(void) { return CPU_Rep.f_7_EBX_[5]; } static bool AVX512F(void) { return CPU_Rep.f_7_EBX_[16]; } - static bool AVX512PF(void) { return CPU_Rep.f_7_EBX_[26]; } - static bool AVX512ER(void) { return CPU_Rep.f_7_EBX_[27]; } - static bool AVX512CD(void) { return CPU_Rep.f_7_EBX_[28]; } + static bool AVX512BW(void) { return CPU_Rep.f_7_EBX_[30]; } + static bool AVX512VL(void) { return CPU_Rep.f_7_EBX_[31]; } // VL isn't required private: static const InstructionSet_Internal CPU_Rep; @@ -92,11 +91,11 @@ const InstructionSet::InstructionSet_Internal InstructionSet::CPU_Rep; SIMD_ISA check_simd_support() { #if defined(_M_X64) - if (InstructionSet::AVX512F()) { return SIMD_ISA::AVX512; } + if (InstructionSet::AVX512F() && InstructionSet::AVX512BW()) { return SIMD_ISA::AVX512; } else if (InstructionSet::AVX2()) { return SIMD_ISA::AVX2; } else if (InstructionSet::SSSE3()) { return SIMD_ISA::SSSE3; }; #elif defined (__x86_64__) - if (__builtin_cpu_supports("avx512f")) { return SIMD_ISA::AVX512; } + if (__builtin_cpu_supports("avx512f") && __builtin_cpu_supports("avx512bw")) { return SIMD_ISA::AVX512; } else if (__builtin_cpu_supports("avx2")) { return SIMD_ISA::AVX2; } else if (__builtin_cpu_supports("ssse3")) { return SIMD_ISA::SSSE3; }; #elif defined(__aarch64__)