diff --git a/go.mod b/go.mod index 232929340f..7ba38426a4 100644 --- a/go.mod +++ b/go.mod @@ -21,7 +21,7 @@ require ( github.com/google/go-github v17.0.0+incompatible github.com/google/slowjam v1.1.1 github.com/karrick/godirwalk v1.16.1 - github.com/minio/highwayhash v1.0.2 + github.com/minio/highwayhash v1.0.3 github.com/moby/buildkit v0.14.1 github.com/otiai10/copy v1.14.0 github.com/pkg/errors v0.9.1 diff --git a/go.sum b/go.sum index 838e6b0ca9..3d80310a2e 100644 --- a/go.sum +++ b/go.sum @@ -342,8 +342,8 @@ github.com/magiconair/properties v1.8.7/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3v github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zkfA9PSy5pEvNWRP0ET0TIVo= github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4= -github.com/minio/highwayhash v1.0.2 h1:Aak5U0nElisjDCfPSG79Tgzkn2gl66NxOMspRrKnA/g= -github.com/minio/highwayhash v1.0.2/go.mod h1:BQskDq+xkJ12lmlUUi7U0M5Swg3EWR+dLTk+kldvVxY= +github.com/minio/highwayhash v1.0.3 h1:kbnuUMoHYyVl7szWjSxJnxw11k2U709jqFPPmIUyD6Q= +github.com/minio/highwayhash v1.0.3/go.mod h1:GGYsuwP/fPD6Y9hMiXuapVvlIUEhFhMTh0rxU3ik1LQ= github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y= github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= @@ -570,7 +570,6 @@ golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190130150945-aca44879d564/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190801041406-cbf593c0f2f3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= diff --git a/vendor/github.com/minio/highwayhash/.golangci.yml b/vendor/github.com/minio/highwayhash/.golangci.yml index 16a72da1a1..39310d0d4a 100644 --- a/vendor/github.com/minio/highwayhash/.golangci.yml +++ b/vendor/github.com/minio/highwayhash/.golangci.yml @@ -12,13 +12,11 @@ linters: - goimports - misspell - govet - - golint + - revive - ineffassign - gosimple - - deadcode - unparam - unused - - structcheck issues: exclude-use-default: false @@ -27,4 +25,4 @@ issues: - error strings should not be capitalized or end with punctuation or a newline - should have comment # TODO(aead): Remove once all exported ident. have comments! service: - golangci-lint-version: 1.20.0 # use the fixed version to not introduce new linters unexpectedly + golangci-lint-version: 1.51.2 # use the fixed version to not introduce new linters unexpectedly diff --git a/vendor/github.com/minio/highwayhash/README.md b/vendor/github.com/minio/highwayhash/README.md index 9bec7edf5d..0504822c89 100644 --- a/vendor/github.com/minio/highwayhash/README.md +++ b/vendor/github.com/minio/highwayhash/README.md @@ -42,17 +42,17 @@ So for moderately sized messages it tops out at about 15 GB/sec. Also for small ### ARM Performance -Below are the single core results on an EC2 m6g.4xlarge (Graviton2) instance for 256 bit outputs: +Below are the single core results on an EC2 c7g.4xlarge (Graviton3) instance for 256 bit outputs: ``` -BenchmarkSum256_16 96.82 MB/s -BenchmarkSum256_64 445.35 MB/s -BenchmarkSum256_1K 2782.46 MB/s -BenchmarkSum256_8K 4083.58 MB/s -BenchmarkSum256_1M 4986.41 MB/s -BenchmarkSum256_5M 4992.72 MB/s -BenchmarkSum256_10M 4993.32 MB/s -BenchmarkSum256_25M 4992.55 MB/s +BenchmarkSum256_16 143.66 MB/s +BenchmarkSum256_64 628.75 MB/s +BenchmarkSum256_1K 3621.71 MB/s +BenchmarkSum256_8K 5039.64 MB/s +BenchmarkSum256_1M 5279.79 MB/s +BenchmarkSum256_5M 5474.60 MB/s +BenchmarkSum256_10M 5621.73 MB/s +BenchmarkSum256_25M 5250.47 MB/s ``` ### ppc64le Performance diff --git a/vendor/github.com/minio/highwayhash/highwayhashSVE_arm64.s b/vendor/github.com/minio/highwayhash/highwayhashSVE_arm64.s new file mode 100644 index 0000000000..e9b6eb0615 --- /dev/null +++ b/vendor/github.com/minio/highwayhash/highwayhashSVE_arm64.s @@ -0,0 +1,132 @@ +// +// Copyright (c) 2024 Minio Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +//+build !noasm,!appengine + +#include "textflag.h" + +TEXT ·getVectorLength(SB), NOSPLIT, $0 + WORD $0xd2800002 // mov x2, #0 + WORD $0x04225022 // addvl x2, x2, #1 + WORD $0xd37df042 // lsl x2, x2, #3 + WORD $0xd2800003 // mov x3, #0 + WORD $0x04635023 // addpl x3, x3, #1 + WORD $0xd37df063 // lsl x3, x3, #3 + MOVD R2, vl+0(FP) + MOVD R3, pl+8(FP) + RET + +TEXT ·updateArm64Sve(SB), NOSPLIT, $0 + MOVD state+0(FP), R0 + MOVD msg_base+8(FP), R1 + MOVD msg_len+16(FP), R2 // length of message + SUBS $32, R2 + BMI completeSve + + WORD $0x2518e3e1 // ptrue p1.b + WORD $0xa5e0a401 // ld1d z1.d, p1/z, [x0] + WORD $0xa5e1a402 // ld1d z2.d, p1/z, [x0, #1, MUL VL] + WORD $0xa5e2a403 // ld1d z3.d, p1/z, [x0, #2, MUL VL] + WORD $0xa5e3a404 // ld1d z4.d, p1/z, [x0, #3, MUL VL] + + // Load zipper merge constants table pointer + MOVD $·zipperMergeSve(SB), R3 + WORD $0xa5e0a465 // ld1d z5.d, p1/z, [x3] + WORD $0x25b8c006 // mov z6.s, #0 + WORD $0x25d8e3e2 // ptrue p2.d /* set every other lane for "s" type */ + +loopSve: + WORD $0xa5e0a420 // ld1d z0.d, p1/z, [x1] + ADD $32, R1 + + WORD $0x04e00042 // add z2.d, z2.d, z0.d + WORD $0x04e30042 // add z2.d, z2.d, z3.d + WORD $0x04e09420 // lsr z0.d, z1.d, #32 + WORD $0x05a6c847 // sel z7.s, p2, z2.s, z6.s + WORD $0x04d004e0 // mul z0.d, p1/m, z0.d, z7.d + WORD $0x04a33003 // eor z3.d, z0.d, z3.d + WORD $0x04e10081 // add z1.d, z4.d, z1.d + WORD $0x04e09440 // lsr z0.d, z2.d, #32 + WORD $0x05a6c827 // sel z7.s, p2, z1.s, z6.s + WORD $0x04d004e0 // mul z0.d, p1/m, z0.d, z7.d + WORD $0x04a43004 // eor z4.d, z0.d, z4.d + WORD $0x05253040 // tbl z0.b, z2.b, z5.b + WORD $0x04e00021 // add z1.d, z1.d, z0.d + WORD $0x05253020 // tbl z0.b, z1.b, z5.b + WORD $0x04e00042 // add z2.d, z2.d, z0.d + + SUBS $32, R2 + BPL loopSve + + WORD $0xe5e0e401 // st1d z1.d, p1, [x0] + WORD $0xe5e1e402 // st1d z2.d, p1, [x0, #1, MUL VL] + WORD $0xe5e2e403 // st1d z3.d, p1, [x0, #2, MUL VL] + WORD $0xe5e3e404 // st1d z4.d, p1, [x0, #3, MUL VL] + +completeSve: + RET + +TEXT ·updateArm64Sve2(SB), NOSPLIT, $0 + MOVD state+0(FP), R0 + MOVD msg_base+8(FP), R1 + MOVD msg_len+16(FP), R2 // length of message + SUBS $32, R2 + BMI completeSve2 + + WORD $0x2518e3e1 // ptrue p1.b + WORD $0xa5e0a401 // ld1d z1.d, p1/z, [x0] + WORD $0xa5e1a402 // ld1d z2.d, p1/z, [x0, #1, MUL VL] + WORD $0xa5e2a403 // ld1d z3.d, p1/z, [x0, #2, MUL VL] + WORD $0xa5e3a404 // ld1d z4.d, p1/z, [x0, #3, MUL VL] + + // Load zipper merge constants table pointer + MOVD $·zipperMergeSve(SB), R3 + WORD $0xa5e0a465 // ld1d z5.d, p1/z, [x3] + +loopSve2: + WORD $0xa5e0a420 // ld1d z0.d, p1/z, [x1] + ADD $32, R1 + + WORD $0x04e00042 // add z2.d, z2.d, z0.d + WORD $0x04e30042 // add z2.d, z2.d, z3.d + WORD $0x04e09420 // lsr z0.d, z1.d, #32 + WORD $0x45c27800 // umullb z0.d, z0.s, z2.s + WORD $0x04a33003 // eor z3.d, z0.d, z3.d + WORD $0x04e10081 // add z1.d, z4.d, z1.d + WORD $0x04e09440 // lsr z0.d, z2.d, #32 + WORD $0x45c17800 // umullb z0.d, z0.s, z1.s + WORD $0x04a43004 // eor z4.d, z0.d, z4.d + WORD $0x05253040 // tbl z0.b, z2.b, z5.b + WORD $0x04e00021 // add z1.d, z1.d, z0.d + WORD $0x05253020 // tbl z0.b, z1.b, z5.b + WORD $0x04e00042 // add z2.d, z2.d, z0.d + + SUBS $32, R2 + BPL loopSve2 + + WORD $0xe5e0e401 // st1d z1.d, p1, [x0] + WORD $0xe5e1e402 // st1d z2.d, p1, [x0, #1, MUL VL] + WORD $0xe5e2e403 // st1d z3.d, p1, [x0, #2, MUL VL] + WORD $0xe5e3e404 // st1d z4.d, p1, [x0, #3, MUL VL] + +completeSve2: + RET + +DATA ·zipperMergeSve+0x00(SB)/8, $0x000f010e05020c03 +DATA ·zipperMergeSve+0x08(SB)/8, $0x070806090d0a040b +DATA ·zipperMergeSve+0x10(SB)/8, $0x101f111e15121c13 +DATA ·zipperMergeSve+0x18(SB)/8, $0x171816191d1a141b +GLOBL ·zipperMergeSve(SB), (NOPTR+RODATA), $32 diff --git a/vendor/github.com/minio/highwayhash/highwayhash_amd64.go b/vendor/github.com/minio/highwayhash/highwayhash_amd64.go index 5e64cc3b45..b7717836eb 100644 --- a/vendor/github.com/minio/highwayhash/highwayhash_amd64.go +++ b/vendor/github.com/minio/highwayhash/highwayhash_amd64.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a license that can be // found in the LICENSE file. +//go:build amd64 && !gccgo && !appengine && !nacl && !noasm // +build amd64,!gccgo,!appengine,!nacl,!noasm package highwayhash @@ -12,6 +13,8 @@ var ( useSSE4 = cpu.X86.HasSSE41 useAVX2 = cpu.X86.HasAVX2 useNEON = false + useSVE = false + useSVE2 = false useVMX = false ) diff --git a/vendor/github.com/minio/highwayhash/highwayhash_arm64.go b/vendor/github.com/minio/highwayhash/highwayhash_arm64.go index 27935d705e..d94e482d2d 100644 --- a/vendor/github.com/minio/highwayhash/highwayhash_arm64.go +++ b/vendor/github.com/minio/highwayhash/highwayhash_arm64.go @@ -1,24 +1,54 @@ -// Copyright (c) 2017 Minio Inc. All rights reserved. +// Copyright (c) 2017-2024 Minio Inc. All rights reserved. // Use of this source code is governed by a license that can be // found in the LICENSE file. -//+build !noasm,!appengine +//go:build !noasm && !appengine +// +build !noasm,!appengine package highwayhash +import ( + "golang.org/x/sys/cpu" +) + var ( useSSE4 = false useAVX2 = false - useNEON = true + useNEON = cpu.ARM64.HasASIMD + useSVE = cpu.ARM64.HasSVE + useSVE2 = false // cpu.ARM64.HasSVE2 -- disable until tested on real hardware useVMX = false ) +func init() { + if useSVE { + if vl, _ := getVectorLength(); vl != 256 { + // + // Since HighwahHash is designed for AVX2, + // SVE/SVE2 instructions only run correctly + // for vector length of 256 + // + useSVE2 = false + useSVE = false + } + } +} + //go:noescape func initializeArm64(state *[16]uint64, key []byte) //go:noescape func updateArm64(state *[16]uint64, msg []byte) +//go:noescape +func getVectorLength() (vl, pl uint64) + +//go:noescape +func updateArm64Sve(state *[16]uint64, msg []byte) + +//go:noescape +func updateArm64Sve2(state *[16]uint64, msg []byte) + //go:noescape func finalizeArm64(out []byte, state *[16]uint64) @@ -31,7 +61,11 @@ func initialize(state *[16]uint64, key []byte) { } func update(state *[16]uint64, msg []byte) { - if useNEON { + if useSVE2 { + updateArm64Sve2(state, msg) + } else if useSVE { + updateArm64Sve(state, msg) + } else if useNEON { updateArm64(state, msg) } else { updateGeneric(state, msg) diff --git a/vendor/github.com/minio/highwayhash/highwayhash_generic.go b/vendor/github.com/minio/highwayhash/highwayhash_generic.go index 3909e79139..1f66e223ed 100644 --- a/vendor/github.com/minio/highwayhash/highwayhash_generic.go +++ b/vendor/github.com/minio/highwayhash/highwayhash_generic.go @@ -46,40 +46,113 @@ func initializeGeneric(state *[16]uint64, k []byte) { } func updateGeneric(state *[16]uint64, msg []byte) { - for len(msg) > 0 { - // add message - state[v1+0] += binary.LittleEndian.Uint64(msg) - state[v1+1] += binary.LittleEndian.Uint64(msg[8:]) - state[v1+2] += binary.LittleEndian.Uint64(msg[16:]) - state[v1+3] += binary.LittleEndian.Uint64(msg[24:]) - - // v1 += mul0 - state[v1+0] += state[mul0+0] - state[v1+1] += state[mul0+1] - state[v1+2] += state[mul0+2] - state[v1+3] += state[mul0+3] + for len(msg) >= 32 { + m := msg[:32] + // add message + mul0 + // Interleave operations to hide multiplication + state[v1+0] += binary.LittleEndian.Uint64(m) + state[mul0+0] state[mul0+0] ^= uint64(uint32(state[v1+0])) * (state[v0+0] >> 32) - state[mul0+1] ^= uint64(uint32(state[v1+1])) * (state[v0+1] >> 32) - state[mul0+2] ^= uint64(uint32(state[v1+2])) * (state[v0+2] >> 32) - state[mul0+3] ^= uint64(uint32(state[v1+3])) * (state[v0+3] >> 32) - - // v0 += mul1 state[v0+0] += state[mul1+0] - state[v0+1] += state[mul1+1] - state[v0+2] += state[mul1+2] - state[v0+3] += state[mul1+3] - state[mul1+0] ^= uint64(uint32(state[v0+0])) * (state[v1+0] >> 32) + + state[v1+1] += binary.LittleEndian.Uint64(m[8:]) + state[mul0+1] + state[mul0+1] ^= uint64(uint32(state[v1+1])) * (state[v0+1] >> 32) + state[v0+1] += state[mul1+1] state[mul1+1] ^= uint64(uint32(state[v0+1])) * (state[v1+1] >> 32) + + state[v1+2] += binary.LittleEndian.Uint64(m[16:]) + state[mul0+2] + state[mul0+2] ^= uint64(uint32(state[v1+2])) * (state[v0+2] >> 32) + state[v0+2] += state[mul1+2] state[mul1+2] ^= uint64(uint32(state[v0+2])) * (state[v1+2] >> 32) + + state[v1+3] += binary.LittleEndian.Uint64(m[24:]) + state[mul0+3] + state[mul0+3] ^= uint64(uint32(state[v1+3])) * (state[v0+3] >> 32) + state[v0+3] += state[mul1+3] state[mul1+3] ^= uint64(uint32(state[v0+3])) * (state[v1+3] >> 32) - zipperMerge(state[v1+0], state[v1+1], &state[v0+0], &state[v0+1]) - zipperMerge(state[v1+2], state[v1+3], &state[v0+2], &state[v0+3]) + // inlined: zipperMerge(state[v1+0], state[v1+1], &state[v0+0], &state[v0+1]) + { + val0 := state[v1+0] + val1 := state[v1+1] + res := val0 & (0xff << (2 * 8)) + res2 := (val0 & (0xff << (7 * 8))) + (val1 & (0xff << (2 * 8))) + res += (val1 & (0xff << (7 * 8))) >> 8 + res2 += (val0 & (0xff << (6 * 8))) >> 8 + res += ((val0 & (0xff << (5 * 8))) + (val1 & (0xff << (6 * 8)))) >> 16 + res2 += (val1 & (0xff << (5 * 8))) >> 16 + res += ((val0 & (0xff << (3 * 8))) + (val1 & (0xff << (4 * 8)))) >> 24 + res2 += ((val1 & (0xff << (3 * 8))) + (val0 & (0xff << (4 * 8)))) >> 24 + res += (val0 & (0xff << (1 * 8))) << 32 + res2 += (val1 & 0xff) << 48 + res += val0 << 56 + res2 += (val1 & (0xff << (1 * 8))) << 24 + + state[v0+0] += res + state[v0+1] += res2 + } + // zipperMerge(state[v1+2], state[v1+3], &state[v0+2], &state[v0+3]) + { + val0 := state[v1+2] + val1 := state[v1+3] + res := val0 & (0xff << (2 * 8)) + res2 := (val0 & (0xff << (7 * 8))) + (val1 & (0xff << (2 * 8))) + res += (val1 & (0xff << (7 * 8))) >> 8 + res2 += (val0 & (0xff << (6 * 8))) >> 8 + res += ((val0 & (0xff << (5 * 8))) + (val1 & (0xff << (6 * 8)))) >> 16 + res2 += (val1 & (0xff << (5 * 8))) >> 16 + res += ((val0 & (0xff << (3 * 8))) + (val1 & (0xff << (4 * 8)))) >> 24 + res2 += ((val1 & (0xff << (3 * 8))) + (val0 & (0xff << (4 * 8)))) >> 24 + res += (val0 & (0xff << (1 * 8))) << 32 + res2 += (val1 & 0xff) << 48 + res += val0 << 56 + res2 += (val1 & (0xff << (1 * 8))) << 24 + + state[v0+2] += res + state[v0+3] += res2 + } + + // inlined: zipperMerge(state[v0+0], state[v0+1], &state[v1+0], &state[v1+1]) + { + val0 := state[v0+0] + val1 := state[v0+1] + res := val0 & (0xff << (2 * 8)) + res2 := (val0 & (0xff << (7 * 8))) + (val1 & (0xff << (2 * 8))) + res += (val1 & (0xff << (7 * 8))) >> 8 + res2 += (val0 & (0xff << (6 * 8))) >> 8 + res += ((val0 & (0xff << (5 * 8))) + (val1 & (0xff << (6 * 8)))) >> 16 + res2 += (val1 & (0xff << (5 * 8))) >> 16 + res += ((val0 & (0xff << (3 * 8))) + (val1 & (0xff << (4 * 8)))) >> 24 + res2 += ((val1 & (0xff << (3 * 8))) + (val0 & (0xff << (4 * 8)))) >> 24 + res += (val0 & (0xff << (1 * 8))) << 32 + res2 += (val1 & 0xff) << 48 + res += val0 << 56 + res2 += (val1 & (0xff << (1 * 8))) << 24 + + state[v1+0] += res + state[v1+1] += res2 + } + + //inlined: zipperMerge(state[v0+2], state[v0+3], &state[v1+2], &state[v1+3]) + { + val0 := state[v0+2] + val1 := state[v0+3] + res := val0 & (0xff << (2 * 8)) + res2 := (val0 & (0xff << (7 * 8))) + (val1 & (0xff << (2 * 8))) + res += (val1 & (0xff << (7 * 8))) >> 8 + res2 += (val0 & (0xff << (6 * 8))) >> 8 + res += ((val0 & (0xff << (5 * 8))) + (val1 & (0xff << (6 * 8)))) >> 16 + res2 += (val1 & (0xff << (5 * 8))) >> 16 + res += ((val0 & (0xff << (3 * 8))) + (val1 & (0xff << (4 * 8)))) >> 24 + res2 += ((val1 & (0xff << (3 * 8))) + (val0 & (0xff << (4 * 8)))) >> 24 + res += (val0 & (0xff << (1 * 8))) << 32 + res2 += (val1 & 0xff) << 48 + res += val0 << 56 + res2 += (val1 & (0xff << (1 * 8))) << 24 - zipperMerge(state[v0+0], state[v0+1], &state[v1+0], &state[v1+1]) - zipperMerge(state[v0+2], state[v0+3], &state[v1+2], &state[v1+3]) + state[v1+2] += res + state[v1+3] += res2 + } msg = msg[32:] } } @@ -124,25 +197,129 @@ func finalizeGeneric(out []byte, state *[16]uint64) { } } +// Experiments on variations left for future reference... +/* func zipperMerge(v0, v1 uint64, d0, d1 *uint64) { - m0 := v0 & (0xFF << (2 * 8)) - m1 := (v1 & (0xFF << (7 * 8))) >> 8 - m2 := ((v0 & (0xFF << (5 * 8))) + (v1 & (0xFF << (6 * 8)))) >> 16 - m3 := ((v0 & (0xFF << (3 * 8))) + (v1 & (0xFF << (4 * 8)))) >> 24 - m4 := (v0 & (0xFF << (1 * 8))) << 32 - m5 := v0 << 56 - - *d0 += m0 + m1 + m2 + m3 + m4 + m5 - - m0 = (v0 & (0xFF << (7 * 8))) + (v1 & (0xFF << (2 * 8))) - m1 = (v0 & (0xFF << (6 * 8))) >> 8 - m2 = (v1 & (0xFF << (5 * 8))) >> 16 - m3 = ((v1 & (0xFF << (3 * 8))) + (v0 & (0xFF << (4 * 8)))) >> 24 - m4 = (v1 & 0xFF) << 48 - m5 = (v1 & (0xFF << (1 * 8))) << 24 - - *d1 += m3 + m2 + m5 + m1 + m4 + m0 + if true { + // fastest. original interleaved... + res := v0 & (0xff << (2 * 8)) + res2 := (v0 & (0xff << (7 * 8))) + (v1 & (0xff << (2 * 8))) + res += (v1 & (0xff << (7 * 8))) >> 8 + res2 += (v0 & (0xff << (6 * 8))) >> 8 + res += ((v0 & (0xff << (5 * 8))) + (v1 & (0xff << (6 * 8)))) >> 16 + res2 += (v1 & (0xff << (5 * 8))) >> 16 + res += ((v0 & (0xff << (3 * 8))) + (v1 & (0xff << (4 * 8)))) >> 24 + res2 += ((v1 & (0xff << (3 * 8))) + (v0 & (0xff << (4 * 8)))) >> 24 + res += (v0 & (0xff << (1 * 8))) << 32 + res2 += (v1 & 0xff) << 48 + res += v0 << 56 + res2 += (v1 & (0xff << (1 * 8))) << 24 + + *d0 += res + *d1 += res2 + } else if false { + // Reading bytes and combining into uint64 + var v0b [8]byte + binary.LittleEndian.PutUint64(v0b[:], v0) + var v1b [8]byte + binary.LittleEndian.PutUint64(v1b[:], v1) + var res, res2 uint64 + + res = uint64(v0b[0]) << (7 * 8) + res2 = uint64(v1b[0]) << (6 * 8) + res |= uint64(v0b[1]) << (5 * 8) + res2 |= uint64(v1b[1]) << (4 * 8) + res |= uint64(v0b[2]) << (2 * 8) + res2 |= uint64(v1b[2]) << (2 * 8) + res |= uint64(v0b[3]) + res2 |= uint64(v0b[4]) << (1 * 8) + res |= uint64(v0b[5]) << (3 * 8) + res2 |= uint64(v0b[6]) << (5 * 8) + res |= uint64(v1b[4]) << (1 * 8) + res2 |= uint64(v0b[7]) << (7 * 8) + res |= uint64(v1b[6]) << (4 * 8) + res2 |= uint64(v1b[3]) + res |= uint64(v1b[7]) << (6 * 8) + res2 |= uint64(v1b[5]) << (3 * 8) + + *d0 += res + *d1 += res2 + + } else if false { + // bytes to bytes shuffle + var v0b [8]byte + binary.LittleEndian.PutUint64(v0b[:], v0) + var v1b [8]byte + binary.LittleEndian.PutUint64(v1b[:], v1) + var res [8]byte + + //res += ((v0 & (0xff << (3 * 8))) + (v1 & (0xff << (4 * 8)))) >> 24 + res[0] = v0b[3] + res[1] = v1b[4] + + // res := v0 & (0xff << (2 * 8)) + res[2] = v0b[2] + + //res += ((v0 & (0xff << (5 * 8))) + (v1 & (0xff << (6 * 8)))) >> 16 + res[3] = v0b[5] + res[4] = v1b[6] + + //res += (v0 & (0xff << (1 * 8))) << 32 + res[5] = v0b[1] + + //res += (v1 & (0xff << (7 * 8))) >> 8 + res[6] += v1b[7] + + //res += v0 << 56 + res[7] = v0b[0] + v0 = binary.LittleEndian.Uint64(res[:]) + *d0 += v0 + + //res += ((v1 & (0xff << (3 * 8))) + (v0 & (0xff << (4 * 8)))) >> 24 + res[0] = v1b[3] + res[1] = v0b[4] + + res[2] = v1b[2] + + // res += (v1 & (0xff << (5 * 8))) >> 16 + res[3] = v1b[5] + + //res += (v1 & (0xff << (1 * 8))) << 24 + res[4] = v1b[1] + + // res += (v0 & (0xff << (6 * 8))) >> 8 + res[5] = v0b[6] + + //res := (v0 & (0xff << (7 * 8))) + (v1 & (0xff << (2 * 8))) + res[7] = v0b[7] + + //res += (v1 & 0xff) << 48 + res[6] = v1b[0] + + v0 = binary.LittleEndian.Uint64(res[:]) + *d1 += v0 + } else { + // original. + res := v0 & (0xff << (2 * 8)) + res += (v1 & (0xff << (7 * 8))) >> 8 + res += ((v0 & (0xff << (5 * 8))) + (v1 & (0xff << (6 * 8)))) >> 16 + res += ((v0 & (0xff << (3 * 8))) + (v1 & (0xff << (4 * 8)))) >> 24 + res += (v0 & (0xff << (1 * 8))) << 32 + res += v0 << 56 + + *d0 += res + + res = (v0 & (0xff << (7 * 8))) + (v1 & (0xff << (2 * 8))) + res += (v0 & (0xff << (6 * 8))) >> 8 + res += (v1 & (0xff << (5 * 8))) >> 16 + res += ((v1 & (0xff << (3 * 8))) + (v0 & (0xff << (4 * 8)))) >> 24 + res += (v1 & 0xff) << 48 + res += (v1 & (0xff << (1 * 8))) << 24 + + *d1 += res + } } +*/ // reduce v = [v0, v1, v2, v3] mod the irreducible polynomial x^128 + x^2 + x func reduceMod(v0, v1, v2, v3 uint64) (r0, r1 uint64) { diff --git a/vendor/github.com/minio/highwayhash/highwayhash_ppc64le.go b/vendor/github.com/minio/highwayhash/highwayhash_ppc64le.go index a988c74e60..cf9ee1a262 100644 --- a/vendor/github.com/minio/highwayhash/highwayhash_ppc64le.go +++ b/vendor/github.com/minio/highwayhash/highwayhash_ppc64le.go @@ -2,7 +2,8 @@ // Use of this source code is governed by a license that can be // found in the LICENSE file. -//+build !noasm,!appengine +//go:build !noasm && !appengine +// +build !noasm,!appengine package highwayhash @@ -10,6 +11,8 @@ var ( useSSE4 = false useAVX2 = false useNEON = false + useSVE = false + useSVE2 = false useVMX = true ) diff --git a/vendor/github.com/minio/highwayhash/highwayhash_ref.go b/vendor/github.com/minio/highwayhash/highwayhash_ref.go index e70a94779b..42cbbb4c44 100644 --- a/vendor/github.com/minio/highwayhash/highwayhash_ref.go +++ b/vendor/github.com/minio/highwayhash/highwayhash_ref.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a license that can be // found in the LICENSE file. +//go:build noasm || (!amd64 && !arm64 && !ppc64le) // +build noasm !amd64,!arm64,!ppc64le package highwayhash @@ -10,6 +11,8 @@ var ( useSSE4 = false useAVX2 = false useNEON = false + useSVE = false + useSVE2 = false useVMX = false ) diff --git a/vendor/modules.txt b/vendor/modules.txt index def823f3e5..3d64ec47b2 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -842,7 +842,7 @@ github.com/magiconair/properties # github.com/matttproud/golang_protobuf_extensions v1.0.4 ## explicit; go 1.9 github.com/matttproud/golang_protobuf_extensions/pbutil -# github.com/minio/highwayhash v1.0.2 +# github.com/minio/highwayhash v1.0.3 ## explicit; go 1.15 github.com/minio/highwayhash # github.com/mitchellh/go-homedir v1.1.0