Skip to content

Commit

Permalink
Merge branch 'master' into arm-half
Browse files Browse the repository at this point in the history
  • Loading branch information
nihui authored Mar 1, 2024
2 parents dce0f33 + a753c21 commit 2e7b7fb
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 1 deletion.
14 changes: 13 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,9 @@ avx512ifma = 0
avx512vbmi = 0
avx512vbmi2 = 0
avx512fp16 = 0
avx512er = 0
avx5124fmaps = 0
avx5124vnniw = 0
avxvnni = 0
avxvnniint8 = 0
avxifma = 0
Expand Down Expand Up @@ -394,6 +397,9 @@ avx512ifma = 0
avx512vbmi = 0
avx512vbmi2 = 0
avx512fp16 = 0
avx512er = 0
avx5124fmaps = 0
avx5124vnniw = 0
avxvnni = 0
avxvnniint8 = 0
avxifma = 0
Expand Down Expand Up @@ -450,6 +456,9 @@ avx512ifma = 0
avx512vbmi = 0
avx512vbmi2 = 0
avx512fp16 = 0
avx512er = 0
avx5124fmaps = 0
avx5124vnniw = 0
avxvnni = 0
avxvnniint8 = 0
avxifma = 0
Expand Down Expand Up @@ -486,6 +495,9 @@ avx512ifma = 0
avx512vbmi = 0
avx512vbmi2 = 0
avx512fp16 = 0
avx512er = 0
avx5124fmaps = 0
avx5124vnniw = 0
avxvnni = 0
avxvnniint8 = 0
avxifma = 0
Expand Down Expand Up @@ -513,7 +525,7 @@ _`fma4` on zen1, ISA in hypervisor, etc._

|CPU|ISA|
|:---:|---|
|x86|`mmx` `sse` `sse2` `sse3` `ssse3` `sse41` `sse42` `sse4a` `xop` `avx` `f16c` `fma` `fma4` `avx2` `avx512f` `avx512bw` `avx512cd` `avx512dq` `avx512vl` `avx512vnni` `avx512bf16` `avx512ifma` `avx512vbmi` `avx512vbmi2` `avx512fp16` `avxvnni` `avxvnniint8` `avxifma`|
|x86|`mmx` `sse` `sse2` `sse3` `ssse3` `sse41` `sse42` `sse4a` `xop` `avx` `f16c` `fma` `fma4` `avx2` `avx512f` `avx512bw` `avx512cd` `avx512dq` `avx512vl` `avx512vnni` `avx512bf16` `avx512ifma` `avx512vbmi` `avx512vbmi2` `avx512fp16` `avx512er` `avx5124fmaps` `avx5124vnniw` `avxvnni` `avxvnniint8` `avxifma`|
|arm|`half` `edsp` `neon` `vfpv4` `idiv`|
|aarch64|`neon` `vfpv4` `cpuid` `asimdrdm` `asimdhp` `asimddp` `asimdfhm` `bf16` `i8mm` `mte` `sve` `sve2` `svebf16` `svei8mm` `svef32mm` `pmull` `crc32` `aes` `sha1` `sha2` `sha3` `sha512` `sm3` `sm4` `amx`|
|mips|`msa`|
Expand Down
3 changes: 3 additions & 0 deletions main.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ int main()
PRINT_ISA_SUPPORT(avx512vbmi)
PRINT_ISA_SUPPORT(avx512vbmi2)
PRINT_ISA_SUPPORT(avx512fp16)
PRINT_ISA_SUPPORT(avx512er)
PRINT_ISA_SUPPORT(avx5124fmaps)
PRINT_ISA_SUPPORT(avx5124vnniw)
PRINT_ISA_SUPPORT(avxvnni)
PRINT_ISA_SUPPORT(avxvnniint8)
PRINT_ISA_SUPPORT(avxifma)
Expand Down
10 changes: 10 additions & 0 deletions ruapu.h
Original file line number Diff line number Diff line change
Expand Up @@ -204,8 +204,13 @@ RUAPU_INSTCODE(avx512ifma, 0x62, 0xf2, 0xfd, 0x48, 0xb4, 0xc0) // vpmadd52luq zm
RUAPU_INSTCODE(avx512vbmi, 0x62, 0xf2, 0x7d, 0x48, 0x75, 0xc0) // vpermi2b zmm0,zmm0,zmm0
RUAPU_INSTCODE(avx512vbmi2, 0x62, 0xf2, 0x7d, 0x48, 0x71, 0xc0) // vpshldvd zmm0,zmm0,zmm0
RUAPU_INSTCODE(avx512fp16, 0x62, 0xf6, 0x7d, 0x48, 0x98, 0xc0) // vfmadd132ph zmm0,zmm0,zmm0
// TODO:avx512pf, vgatherpf1dps DWORD PTR [esp+zmm0*1]{k1}
RUAPU_INSTCODE(avx512er, 0x62, 0xf2, 0xfd, 0x48, 0xc8, 0xc0) //vexp2pd zmm0,zmm0
RUAPU_INSTCODE(avx5124fmaps, 0x67, 0x62, 0xf2, 0x7f, 0x48, 0x9a, 0x04, 0x24) //v4fmaddps zmm0,zmm0,XMMWORD PTR [esp]
RUAPU_INSTCODE(avx5124vnniw, 0x67, 0x62, 0xf2, 0x7f, 0x48, 0x52, 0x04, 0x24) //vp4dpwssd zmm0,zmm0,XMMWORD PTR [esp]
RUAPU_INSTCODE(avxvnni, 0xc4, 0xe2, 0x7d, 0x52, 0xc0) // vpdpwssd ymm0,ymm0,ymm0
RUAPU_INSTCODE(avxvnniint8, 0xc4, 0xe2, 0x7f, 0x50, 0xc0) // vpdpbssd ymm0,ymm0,ymm0
// TODO:avxvnniint16, vpdpwusd xmm,xmm,xmm
RUAPU_INSTCODE(avxifma, 0xc4, 0xe2, 0xfd, 0xb4, 0xc0) // vpmadd52luq ymm0,ymm0,ymm0

#elif __aarch64__ || defined(_M_ARM64)
Expand Down Expand Up @@ -318,8 +323,13 @@ RUAPU_ISAENTRY(avx512ifma)
RUAPU_ISAENTRY(avx512vbmi)
RUAPU_ISAENTRY(avx512vbmi2)
RUAPU_ISAENTRY(avx512fp16)
// TODO:avx512pf
RUAPU_ISAENTRY(avx512er)
RUAPU_ISAENTRY(avx5124fmaps)
RUAPU_ISAENTRY(avx5124vnniw)
RUAPU_ISAENTRY(avxvnni)
RUAPU_ISAENTRY(avxvnniint8)
// TODO:avxvnniint16
RUAPU_ISAENTRY(avxifma)

#elif __aarch64__ || defined(_M_ARM64)
Expand Down

0 comments on commit 2e7b7fb

Please sign in to comment.