From 63a77795e130e71e0b165fe0a30cb362487fb47b Mon Sep 17 00:00:00 2001
From: Dave Enyeart <enyeart@us.ibm.com>
Date: Tue, 25 Jan 2022 03:04:58 -0500
Subject: [PATCH] Bump Go to 1.17.5 (release-2.4) (#3182)

* Go 1.17 prep - ignore vendor directory in staticcheck (#3114)

A few vendored dependencies have benign staticcheck errors
when using Go 1.17.

This commit simply filters out vendor directories in staticcheck output
since the intention of the linter is to check Fabric source only.

Note - it is not possible to filter the vendor directories as input to staticcheck,
as they will still show up when the Fabric package that uses them gets checked.
Have to filter the output instead.

Signed-off-by: David Enyeart <enyeart@us.ibm.com>

* Bump Go to 1.17.5

Bump Go to 1.17.5.
Fix unit tests to work with Go 1.17.5.
Update dependencies to work with Go 1.17.

Signed-off-by: David Enyeart <enyeart@us.ibm.com>

* Use upgraded deps

Signed-off-by: Alessandro Sorniotti <aso@zurich.ibm.com>

Co-authored-by: Alessandro Sorniotti <aso@zurich.ibm.com>
---
 Makefile                                      |    2 +-
 ci/azure-pipelines-merge.yml                  |    2 +-
 ci/azure-pipelines-release.yml                |    2 +-
 ci/azure-pipelines.yml                        |    2 +-
 cmd/common/signer/signer_test.go              |    2 +-
 common/crypto/expiration_test.go              |    4 +-
 docs/source/dev-setup/devenv.rst              |    2 +-
 docs/source/prereqs.md                        |    4 +-
 go.mod                                        |    4 +-
 go.sum                                        |   28 +-
 scripts/golinter.sh                           |    3 +-
 vagrant/golang.sh                             |    2 +-
 vendor/github.com/IBM/idemix/go.mod           |    2 +-
 vendor/github.com/IBM/idemix/go.sum           |   21 +-
 vendor/github.com/IBM/mathlib/go.mod          |    4 +-
 vendor/github.com/IBM/mathlib/go.sum          |   24 +-
 .../github.com/cespare/xxhash/v2/.travis.yml  |    8 -
 vendor/github.com/cespare/xxhash/v2/README.md |    6 +-
 vendor/github.com/cespare/xxhash/v2/xxhash.go |    1 -
 .../cespare/xxhash/v2/xxhash_amd64.s          |   62 +-
 .../cespare/xxhash/v2/xxhash_unsafe.go        |   53 +-
 .../consensys/gnark-crypto/ecc/bn254/bn254.go |   10 +-
 .../consensys/gnark-crypto/ecc/bn254/doc.go   |   18 +
 .../gnark-crypto/ecc/bn254/fp/asm.go          |    1 +
 .../gnark-crypto/ecc/bn254/fp/asm_noadx.go    |    1 +
 .../gnark-crypto/ecc/bn254/fp/doc.go          |   43 +
 .../gnark-crypto/ecc/bn254/fp/element.go      |  895 ++++--
 .../gnark-crypto/ecc/bn254/fp/element_exp.go  |  802 +++++
 .../gnark-crypto/ecc/bn254/fp/element_fuzz.go |  136 +
 .../ecc/bn254/fp/element_mul_adx_amd64.s      |  100 +-
 .../ecc/bn254/fp/element_mul_amd64.s          |  100 +-
 .../ecc/bn254/fp/element_ops_amd64.go         |    6 +
 .../ecc/bn254/fp/element_ops_amd64.s          |  105 +
 .../ecc/bn254/fp/element_ops_noasm.go         |   13 +
 .../gnark-crypto/ecc/bn254/fr/asm.go          |    1 +
 .../gnark-crypto/ecc/bn254/fr/asm_noadx.go    |    1 +
 .../gnark-crypto/ecc/bn254/fr/doc.go          |   43 +
 .../gnark-crypto/ecc/bn254/fr/element.go      |  901 ++++--
 .../gnark-crypto/ecc/bn254/fr/element_exp.go  |  819 +++++
 .../gnark-crypto/ecc/bn254/fr/element_fuzz.go |  136 +
 .../ecc/bn254/fr/element_mul_adx_amd64.s      |  100 +-
 .../ecc/bn254/fr/element_mul_amd64.s          |  100 +-
 .../ecc/bn254/fr/element_ops_amd64.go         |    6 +
 .../ecc/bn254/fr/element_ops_amd64.s          |  105 +
 .../ecc/bn254/fr/element_ops_noasm.go         |   13 +
 .../gnark-crypto/ecc/bn254/fr/mimc/doc.go     |   18 +
 .../gnark-crypto/ecc/bn254/fr/mimc/fuzz.go    |   34 +
 .../gnark-crypto/ecc/bn254/fr/mimc/mimc.go    |  174 +
 .../consensys/gnark-crypto/ecc/bn254/fuzz.go  |   76 +
 .../consensys/gnark-crypto/ecc/bn254/g1.go    |   45 +-
 .../consensys/gnark-crypto/ecc/bn254/g2.go    |   66 +-
 .../ecc/bn254/internal/fptower/asm.go         |    1 +
 .../ecc/bn254/internal/fptower/asm_noadx.go   |    1 +
 .../ecc/bn254/internal/fptower/e12.go         |  203 +-
 .../ecc/bn254/internal/fptower/e12_pairing.go |  278 +-
 .../ecc/bn254/internal/fptower/e2.go          |   40 +
 .../ecc/bn254/internal/fptower/e2_adx_amd64.s |    8 +-
 .../ecc/bn254/internal/fptower/e2_amd64.s     |    8 +-
 .../internal/fptower/e2_bn254_fallback.go     |    1 +
 .../ecc/bn254/internal/fptower/e2_fallback.go |    1 +
 .../ecc/bn254/internal/fptower/e6.go          |   64 +
 .../gnark-crypto/ecc/bn254/marshal.go         |  254 +-
 .../gnark-crypto/ecc/bn254/multiexp.go        | 2815 ++++++++++-------
 .../gnark-crypto/ecc/bn254/pairing.go         |   22 +-
 .../consensys/gnark-crypto/ecc/ecc.go         |   82 +-
 .../consensys/gnark-crypto/ecc/ecc.md         |    5 +-
 .../consensys/gnark-crypto/ecc/utils.go       |   17 +
 .../consensys/gnark-crypto/field/field.go     |  305 ++
 .../consensys/gnark-crypto/field/field.md     |   48 +
 .../field/internal/addchain/addchain.go       |  327 ++
 .../internal/generator/config/bls12-377.go    |   29 +
 .../internal/generator/config/bls12-381.go    |   29 +
 .../internal/generator/config/bls24-315.go    |   29 +
 .../internal/generator/config/bn254.go        |   28 +
 .../internal/generator/config/bw6-633.go      |   28 +
 .../internal/generator/config/bw6-761.go      |   28 +
 .../internal/generator/config/curve.go        |   71 +
 .../mmcloughlin/addchain/.gitignore           |    1 +
 .../mmcloughlin/addchain/.golangci.yml        |   39 +
 .../mmcloughlin/addchain/.goreleaser.yml      |   28 +
 .../mmcloughlin/addchain/.zenodo.json         |   50 +
 .../mmcloughlin/addchain/CITATION.bib         |   11 +
 .../mmcloughlin/addchain/CITATION.cff         |   19 +
 .../github.com/mmcloughlin/addchain/LICENSE   |   29 +
 .../github.com/mmcloughlin/addchain/README.md |  402 +++
 .../mmcloughlin/addchain/acc/acc.go           |   91 +
 .../mmcloughlin/addchain/acc/ast/ast.go       |   71 +
 .../mmcloughlin/addchain/acc/ast/print.go     |  101 +
 .../mmcloughlin/addchain/acc/build.go         |  158 +
 .../mmcloughlin/addchain/acc/decompile.go     |   58 +
 .../mmcloughlin/addchain/acc/ir/ir.go         |  193 ++
 .../mmcloughlin/addchain/acc/parse/acc.peg    |  131 +
 .../acc/parse/internal/parser/zparser.go      | 2203 +++++++++++++
 .../mmcloughlin/addchain/acc/parse/parse.go   |   35 +
 .../mmcloughlin/addchain/acc/pass/alloc.go    |   98 +
 .../mmcloughlin/addchain/acc/pass/eval.go     |   57 +
 .../mmcloughlin/addchain/acc/pass/naming.go   |   89 +
 .../mmcloughlin/addchain/acc/pass/pass.go     |  117 +
 .../addchain/acc/pass/validation.go           |   28 +
 .../addchain/acc/printer/printer.go           |  131 +
 .../mmcloughlin/addchain/acc/translate.go     |  139 +
 .../mmcloughlin/addchain/alg/alg.go           |   43 +
 .../addchain/alg/contfrac/contfrac.go         |  262 ++
 .../mmcloughlin/addchain/alg/dict/dict.go     |  434 +++
 .../mmcloughlin/addchain/alg/dict/runs.go     |  108 +
 .../addchain/alg/ensemble/ensemble.go         |   71 +
 .../mmcloughlin/addchain/alg/exec/exec.go     |   99 +
 .../addchain/alg/heuristic/heuristic.go       |  234 ++
 .../mmcloughlin/addchain/alg/opt/opt.go       |  101 +
 .../github.com/mmcloughlin/addchain/chain.go  |  194 ++
 .../mmcloughlin/addchain/codecov.yml          |    4 +
 vendor/github.com/mmcloughlin/addchain/go.mod |    8 +
 vendor/github.com/mmcloughlin/addchain/go.sum |    4 +
 .../mmcloughlin/addchain/install.sh           |  378 +++
 .../addchain/internal/bigint/bigint.go        |  169 +
 .../addchain/internal/bigints/bigints.go      |  104 +
 .../addchain/internal/bigvector/bigvector.go  |   86 +
 .../addchain/internal/container/heap/heap.go  |   55 +
 .../addchain/internal/errutil/errutil.go      |   28 +
 .../addchain/internal/print/printer.go        |  104 +
 .../github.com/mmcloughlin/addchain/logo.svg  |    5 +
 .../mmcloughlin/addchain/meta/cite.go         |   61 +
 .../mmcloughlin/addchain/meta/meta.go         |  101 +
 .../mmcloughlin/addchain/meta/vars.go         |   10 +
 .../mmcloughlin/addchain/program.go           |  133 +
 vendor/golang.org/x/sys/unix/ioctl_linux.go   |  196 ++
 vendor/golang.org/x/sys/unix/mkerrors.sh      |    7 +-
 vendor/golang.org/x/sys/unix/syscall_linux.go |  176 +-
 .../x/sys/unix/zerrors_freebsd_arm.go         |    9 +
 .../x/sys/unix/zerrors_solaris_amd64.go       |    3 +
 .../x/sys/unix/zerrors_zos_s390x.go           |    7 +
 vendor/golang.org/x/sys/unix/ztypes_linux.go  |   15 +
 .../x/sys/windows/security_windows.go         |   16 +-
 vendor/modules.txt                            |   38 +-
 134 files changed, 15104 insertions(+), 2266 deletions(-)
 delete mode 100644 vendor/github.com/cespare/xxhash/v2/.travis.yml
 create mode 100644 vendor/github.com/consensys/gnark-crypto/ecc/bn254/doc.go
 create mode 100644 vendor/github.com/consensys/gnark-crypto/ecc/bn254/fp/doc.go
 create mode 100644 vendor/github.com/consensys/gnark-crypto/ecc/bn254/fp/element_exp.go
 create mode 100644 vendor/github.com/consensys/gnark-crypto/ecc/bn254/fp/element_fuzz.go
 create mode 100644 vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/doc.go
 create mode 100644 vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/element_exp.go
 create mode 100644 vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/element_fuzz.go
 create mode 100644 vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/mimc/doc.go
 create mode 100644 vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/mimc/fuzz.go
 create mode 100644 vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/mimc/mimc.go
 create mode 100644 vendor/github.com/consensys/gnark-crypto/ecc/bn254/fuzz.go
 create mode 100644 vendor/github.com/consensys/gnark-crypto/field/field.go
 create mode 100644 vendor/github.com/consensys/gnark-crypto/field/field.md
 create mode 100644 vendor/github.com/consensys/gnark-crypto/field/internal/addchain/addchain.go
 create mode 100644 vendor/github.com/consensys/gnark-crypto/internal/generator/config/bls12-377.go
 create mode 100644 vendor/github.com/consensys/gnark-crypto/internal/generator/config/bls12-381.go
 create mode 100644 vendor/github.com/consensys/gnark-crypto/internal/generator/config/bls24-315.go
 create mode 100644 vendor/github.com/consensys/gnark-crypto/internal/generator/config/bn254.go
 create mode 100644 vendor/github.com/consensys/gnark-crypto/internal/generator/config/bw6-633.go
 create mode 100644 vendor/github.com/consensys/gnark-crypto/internal/generator/config/bw6-761.go
 create mode 100644 vendor/github.com/consensys/gnark-crypto/internal/generator/config/curve.go
 create mode 100644 vendor/github.com/mmcloughlin/addchain/.gitignore
 create mode 100644 vendor/github.com/mmcloughlin/addchain/.golangci.yml
 create mode 100644 vendor/github.com/mmcloughlin/addchain/.goreleaser.yml
 create mode 100644 vendor/github.com/mmcloughlin/addchain/.zenodo.json
 create mode 100644 vendor/github.com/mmcloughlin/addchain/CITATION.bib
 create mode 100644 vendor/github.com/mmcloughlin/addchain/CITATION.cff
 create mode 100644 vendor/github.com/mmcloughlin/addchain/LICENSE
 create mode 100644 vendor/github.com/mmcloughlin/addchain/README.md
 create mode 100644 vendor/github.com/mmcloughlin/addchain/acc/acc.go
 create mode 100644 vendor/github.com/mmcloughlin/addchain/acc/ast/ast.go
 create mode 100644 vendor/github.com/mmcloughlin/addchain/acc/ast/print.go
 create mode 100644 vendor/github.com/mmcloughlin/addchain/acc/build.go
 create mode 100644 vendor/github.com/mmcloughlin/addchain/acc/decompile.go
 create mode 100644 vendor/github.com/mmcloughlin/addchain/acc/ir/ir.go
 create mode 100644 vendor/github.com/mmcloughlin/addchain/acc/parse/acc.peg
 create mode 100644 vendor/github.com/mmcloughlin/addchain/acc/parse/internal/parser/zparser.go
 create mode 100644 vendor/github.com/mmcloughlin/addchain/acc/parse/parse.go
 create mode 100644 vendor/github.com/mmcloughlin/addchain/acc/pass/alloc.go
 create mode 100644 vendor/github.com/mmcloughlin/addchain/acc/pass/eval.go
 create mode 100644 vendor/github.com/mmcloughlin/addchain/acc/pass/naming.go
 create mode 100644 vendor/github.com/mmcloughlin/addchain/acc/pass/pass.go
 create mode 100644 vendor/github.com/mmcloughlin/addchain/acc/pass/validation.go
 create mode 100644 vendor/github.com/mmcloughlin/addchain/acc/printer/printer.go
 create mode 100644 vendor/github.com/mmcloughlin/addchain/acc/translate.go
 create mode 100644 vendor/github.com/mmcloughlin/addchain/alg/alg.go
 create mode 100644 vendor/github.com/mmcloughlin/addchain/alg/contfrac/contfrac.go
 create mode 100644 vendor/github.com/mmcloughlin/addchain/alg/dict/dict.go
 create mode 100644 vendor/github.com/mmcloughlin/addchain/alg/dict/runs.go
 create mode 100644 vendor/github.com/mmcloughlin/addchain/alg/ensemble/ensemble.go
 create mode 100644 vendor/github.com/mmcloughlin/addchain/alg/exec/exec.go
 create mode 100644 vendor/github.com/mmcloughlin/addchain/alg/heuristic/heuristic.go
 create mode 100644 vendor/github.com/mmcloughlin/addchain/alg/opt/opt.go
 create mode 100644 vendor/github.com/mmcloughlin/addchain/chain.go
 create mode 100644 vendor/github.com/mmcloughlin/addchain/codecov.yml
 create mode 100644 vendor/github.com/mmcloughlin/addchain/go.mod
 create mode 100644 vendor/github.com/mmcloughlin/addchain/go.sum
 create mode 100644 vendor/github.com/mmcloughlin/addchain/install.sh
 create mode 100644 vendor/github.com/mmcloughlin/addchain/internal/bigint/bigint.go
 create mode 100644 vendor/github.com/mmcloughlin/addchain/internal/bigints/bigints.go
 create mode 100644 vendor/github.com/mmcloughlin/addchain/internal/bigvector/bigvector.go
 create mode 100644 vendor/github.com/mmcloughlin/addchain/internal/container/heap/heap.go
 create mode 100644 vendor/github.com/mmcloughlin/addchain/internal/errutil/errutil.go
 create mode 100644 vendor/github.com/mmcloughlin/addchain/internal/print/printer.go
 create mode 100644 vendor/github.com/mmcloughlin/addchain/logo.svg
 create mode 100644 vendor/github.com/mmcloughlin/addchain/meta/cite.go
 create mode 100644 vendor/github.com/mmcloughlin/addchain/meta/meta.go
 create mode 100644 vendor/github.com/mmcloughlin/addchain/meta/vars.go
 create mode 100644 vendor/github.com/mmcloughlin/addchain/program.go
 create mode 100644 vendor/golang.org/x/sys/unix/ioctl_linux.go

diff --git a/Makefile b/Makefile
index ee053db8a40..1e653a461b6 100644
--- a/Makefile
+++ b/Makefile
@@ -78,7 +78,7 @@ METADATA_VAR += CommitSHA=$(EXTRA_VERSION)
 METADATA_VAR += BaseDockerLabel=$(BASE_DOCKER_LABEL)
 METADATA_VAR += DockerNamespace=$(DOCKER_NS)
 
-GO_VER = 1.16.7
+GO_VER = 1.17.5
 GO_TAGS ?=
 
 RELEASE_EXES = orderer $(TOOLS_EXES)
diff --git a/ci/azure-pipelines-merge.yml b/ci/azure-pipelines-merge.yml
index 760af5e3a05..40312a06b90 100644
--- a/ci/azure-pipelines-merge.yml
+++ b/ci/azure-pipelines-merge.yml
@@ -11,7 +11,7 @@ pr: none
 variables:
   GOPATH: $(Agent.BuildDirectory)/go
   PATH: $(Agent.BuildDirectory)/go/bin:/usr/local/go/bin:/bin:/usr/bin:/sbin:/usr/sbin:/usr/local/bin:/usr/local/sbin
-  GOVER: 1.16.7
+  GOVER: 1.17.5
 
 jobs:
   - job: UnitTests
diff --git a/ci/azure-pipelines-release.yml b/ci/azure-pipelines-release.yml
index 001e99e8d9d..eafe4e0fad9 100644
--- a/ci/azure-pipelines-release.yml
+++ b/ci/azure-pipelines-release.yml
@@ -11,7 +11,7 @@ variables:
   - name: GOPATH
     value: $(Agent.BuildDirectory)/go
   - name: GOVER
-    value: 1.16.7
+    value: 1.17.5
 
 stages:
   - stage: BuildBinaries
diff --git a/ci/azure-pipelines.yml b/ci/azure-pipelines.yml
index 424258e0c43..1c9fce18248 100644
--- a/ci/azure-pipelines.yml
+++ b/ci/azure-pipelines.yml
@@ -11,7 +11,7 @@ pr:
 variables:
   GOPATH: $(Agent.BuildDirectory)/go
   PATH: $(Agent.BuildDirectory)/go/bin:/bin:/usr/bin:/sbin:/usr/sbin:/usr/local/bin:/usr/local/sbin
-  GOVER: 1.16.7
+  GOVER: 1.17.5
 
 stages:
   - stage: VerifyBuild
diff --git a/cmd/common/signer/signer_test.go b/cmd/common/signer/signer_test.go
index 16a3ace43e8..5da1ec1d535 100644
--- a/cmd/common/signer/signer_test.go
+++ b/cmd/common/signer/signer_test.go
@@ -159,6 +159,6 @@ func TestSignerBadConfig(t *testing.T) {
 	}
 
 	signer, err = NewSigner(conf)
-	require.EqualError(t, err, "enrollment certificate is not a valid x509 certificate: asn1: syntax error: data truncated")
+	require.EqualError(t, err, "enrollment certificate is not a valid x509 certificate: x509: malformed certificate")
 	require.Nil(t, signer)
 }
diff --git a/common/crypto/expiration_test.go b/common/crypto/expiration_test.go
index 67c6e113c1a..6284bc11baf 100644
--- a/common/crypto/expiration_test.go
+++ b/common/crypto/expiration_test.go
@@ -275,14 +275,14 @@ cRv6rqxvy5M+t0DhRtiwCen70YCUsksb
 	}{
 		{
 			description: "Bad first certificate",
-			errContains: "asn1:",
+			errContains: "malformed certificate",
 			first:       []byte{1, 2, 3},
 			second:      bob,
 		},
 
 		{
 			description: "Bad second certificate",
-			errContains: "asn1:",
+			errContains: "malformed certificate",
 			first:       alice,
 			second:      []byte{1, 2, 3},
 		},
diff --git a/docs/source/dev-setup/devenv.rst b/docs/source/dev-setup/devenv.rst
index 45a5dcc4d75..cb8d0b372c2 100644
--- a/docs/source/dev-setup/devenv.rst
+++ b/docs/source/dev-setup/devenv.rst
@@ -5,7 +5,7 @@ Prerequisites
 ~~~~~~~~~~~~~
 
 -  `Git client <https://git-scm.com/downloads>`__
--  `Go <https://golang.org/dl/>`__ version 1.16.x (recommended Go version can be found in project Makefile)
+-  `Go <https://golang.org/dl/>`__ version 1.17.x (recommended Go version can be found in project Makefile)
 -  `Docker <https://docs.docker.com/get-docker/>`__ version 18.03 or later
 -  (macOS) `Xcode Command Line Tools <https://developer.apple.com/downloads/>`__
 -  `SoftHSM <https://github.com/opendnssec/SoftHSMv2>`__ use version 2.5 as 2.6 is broken in this environment
diff --git a/docs/source/prereqs.md b/docs/source/prereqs.md
index 3a891f5b455..5f52ac2ef4c 100644
--- a/docs/source/prereqs.md
+++ b/docs/source/prereqs.md
@@ -82,9 +82,9 @@ Optional: Install the latest Fabric supported version of [Go](https://golang.org
 installed (only required if you will be writing Go chaincode or SDK applications).
 
 ```shell
-$ brew install go@1.16.7
+$ brew install go@1.17.5
 $ go version
-go1.16.7 darwin/amd64
+go1.17.5 darwin/amd64
 ```
 
 ### JQ
diff --git a/go.mod b/go.mod
index 483375653b6..e6419e2a4a6 100644
--- a/go.mod
+++ b/go.mod
@@ -5,7 +5,7 @@ go 1.14
 require (
 	code.cloudfoundry.org/clock v1.0.0
 	github.com/DataDog/zstd v1.4.5 // indirect
-	github.com/IBM/idemix v0.0.0-20210930104432-e4a1410f5353
+	github.com/IBM/idemix v0.0.0-20220112103229-701e7610d405
 	github.com/Knetic/govaluate v3.0.0+incompatible
 	github.com/Shopify/sarama v1.20.1
 	github.com/Shopify/toxiproxy v2.1.4+incompatible // indirect
@@ -69,3 +69,5 @@ require (
 )
 
 replace github.com/onsi/gomega => github.com/onsi/gomega v1.9.0
+
+replace github.com/cespare/xxhash/v2 => github.com/cespare/xxhash/v2 v2.1.2 // fix for Go 1.17 in github.com/prometheus/client_golang dependency without updating protobuf
diff --git a/go.sum b/go.sum
index db8d2eba94c..780f3eb0a22 100644
--- a/go.sum
+++ b/go.sum
@@ -8,10 +8,10 @@ github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ
 github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
 github.com/DataDog/zstd v1.4.5 h1:EndNeuB0l9syBZhut0wns3gV1hL8zX8LIu6ZiVHWLIQ=
 github.com/DataDog/zstd v1.4.5/go.mod h1:1jcaCB/ufaK+sKp1NBhlGmpz41jOoPQ35bpF36t7BBo=
-github.com/IBM/idemix v0.0.0-20210930104432-e4a1410f5353 h1:gOG+V3F5J7gsWzHAoNSHAiYV2o3OvgM7uTrF7BUSt2Y=
-github.com/IBM/idemix v0.0.0-20210930104432-e4a1410f5353/go.mod h1:Fazy7pMxGGdXRRSFgTipzH4Q02bIEPatJa3km9H3w78=
-github.com/IBM/mathlib v0.0.0-20210928081244-f5486459a290 h1:usgCPts8YnOT6ba6CQLPzQ5Yb1crnQ8iU132Zm679IM=
-github.com/IBM/mathlib v0.0.0-20210928081244-f5486459a290/go.mod h1:grSmaMdY3LbW9QwqMrzuTUCHjES4rzT4Dm7q6yIL9vs=
+github.com/IBM/idemix v0.0.0-20220112103229-701e7610d405 h1:7cKDQL0CWDXO9acHJCCc7SUYpMcJ9H9NGVjLvwG66Nc=
+github.com/IBM/idemix v0.0.0-20220112103229-701e7610d405/go.mod h1:tBeRCKH37b2OkQRJVomLoYk8OjIMYQm+oRWFiJF0jQI=
+github.com/IBM/mathlib v0.0.0-20220112091634-0a7378db6912 h1:rySf+WTiafw7zS7P8GUcZGDd2nDTgLDUx51aIcCFuX4=
+github.com/IBM/mathlib v0.0.0-20220112091634-0a7378db6912/go.mod h1:WZGhleRZVSAg25iKkiWXHacTkui2CY1cyJMBOgpQwh8=
 github.com/Knetic/govaluate v3.0.0+incompatible h1:7o6+MAPhYTCF0+fdvoz1xDedhRb4f6s9Tn1Tt7/WTEg=
 github.com/Knetic/govaluate v3.0.0+incompatible/go.mod h1:r7JcOSlj0wfOMncg0iLm8Leh48TZaKVeNIfJntJ2wa0=
 github.com/Microsoft/go-winio v0.4.15-0.20200908182639-5b44b70ab3ab/go.mod h1:tTuCMEN+UleMWgg9dVx4Hu52b1bJo+59jBh3ajtinzw=
@@ -42,14 +42,14 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
 github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
 github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs=
 github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
-github.com/cespare/xxhash/v2 v2.1.1 h1:6MnRN8NT7+YBpUIWxHtefFZOKTAPgGjpQSxqLNn0+qY=
-github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
+github.com/cespare/xxhash/v2 v2.1.2 h1:YRXhKfTDauu4ajMg1TPgFO5jnlC2HCbmLXMcTG5cbYE=
+github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
 github.com/cilium/ebpf v0.0.0-20200110133405-4032b1d8aae3/go.mod h1:MA5e5Lr8slmEg9bt0VpxxWqJlO4iwu3FBdHUzV7wQVg=
 github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
 github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
-github.com/consensys/bavard v0.1.8-0.20210329205436-c3e862ba4e5f/go.mod h1:Bpd0/3mZuaj6Sj+PqrmIquiOKy397AKGThQPaGzNXAQ=
-github.com/consensys/gnark-crypto v0.4.0 h1:KHf7Ta876Ys6L8+i0DLRRKOAa3PfJ8oobAX1CEeIa4A=
-github.com/consensys/gnark-crypto v0.4.0/go.mod h1:wK/gpXP9B06qTzTVML71GhKD1ygP9xOzukbI68NJqsQ=
+github.com/consensys/bavard v0.1.8-0.20210915155054-088da2f7f54a/go.mod h1:9ItSMtA/dXMAiL7BG6bqW2m3NdSEObYWoH223nGHukI=
+github.com/consensys/gnark-crypto v0.6.0 h1:K48rcIJaX2YkQT2k51EiHIxTynpHsOLHF1FVV+0aS7w=
+github.com/consensys/gnark-crypto v0.6.0/go.mod h1:PicAZJP763+7N9LZFfj+MquTXq98pwjD6l8Ry8WdHSU=
 github.com/containerd/cgroups v0.0.0-20200531161412-0dbf7f05ba59 h1:qWj4qVYZ95vLWwqyNJCQg7rDsG5wPdze0UaPolH7DUk=
 github.com/containerd/cgroups v0.0.0-20200531161412-0dbf7f05ba59/go.mod h1:pA0z1pT8KYB3TCXK/ocprsh7MAkoW8bZVzPdih9snmM=
 github.com/containerd/console v0.0.0-20180822173158-c12b1e7919c1/go.mod h1:Tj/on1eG8kiEhd0+fhSDzsPAFESxzBBvdyEgyryXffw=
@@ -141,6 +141,7 @@ github.com/google/go-cmp v0.5.4 h1:L8R9j+yAqZuZjsqh/z+F1NCffTKKLShY6zXTItVIZ8M=
 github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
 github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
+github.com/google/subcommands v1.2.0/go.mod h1:ZjhPrFU+Olkh9WazFPsl27BQ4UPiG37m3yTrtFlrHVk=
 github.com/google/uuid v1.0.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/gorilla/handlers v1.4.0 h1:XulKRWSQK5uChr4pEgSE4Tc/OcmnU9GJuSwdog/tZsA=
 github.com/gorilla/handlers v1.4.0/go.mod h1:Qkdc/uu4tH4g6mTK6auzZ766c4CA0Ng8+o/OAirnOIQ=
@@ -213,6 +214,9 @@ github.com/miekg/pkcs11 v1.0.3 h1:iMwmD7I5225wv84WxIG/bmxz9AXjWvTWIbM/TYHvWtw=
 github.com/miekg/pkcs11 v1.0.3/go.mod h1:XsNlhZGX73bx86s2hdc/FuaLm2CPZJemRLMA+WTFxgs=
 github.com/mitchellh/mapstructure v1.3.2 h1:mRS76wmkOn3KkKAyXDu42V+6ebnXWIztFSYGN7GeoRg=
 github.com/mitchellh/mapstructure v1.3.2/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
+github.com/mmcloughlin/addchain v0.4.0 h1:SobOdjm2xLj1KkXN5/n0xTIWyZA2+s99UCY1iPfkHRY=
+github.com/mmcloughlin/addchain v0.4.0/go.mod h1:A86O+tHqZLMNO4w6ZZ4FlVQEadcoqkyU72HC5wJ4RlU=
+github.com/mmcloughlin/profile v0.1.1/go.mod h1:IhHD7q1ooxgwTgjxQYkACGA77oFTDdFVejUS1/tS/qU=
 github.com/moby/sys/mount v0.2.0 h1:WhCW5B355jtxndN5ovugJlMFJawbUODuW8fSnEH6SSM=
 github.com/moby/sys/mount v0.2.0/go.mod h1:aAivFE2LB3W4bACsUXChRHQ0qKWsetY4Y9V7sxOougM=
 github.com/moby/sys/mountinfo v0.4.0 h1:1KInV3Huv18akCu58V7lzNlt+jFmqlu1EaErnEHE/VM=
@@ -312,6 +316,7 @@ github.com/stretchr/objx v0.3.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoH
 github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
 github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
 github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
+github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 github.com/stretchr/testify v1.7.1-0.20210116013205-6990a05d54c2 h1:oevpAKCW58ZYJe1hqfgLqg+1zXmYrQ9xf7HLUdfS+qM=
 github.com/stretchr/testify v1.7.1-0.20210116013205-6990a05d54c2/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 github.com/sykesm/zap-logfmt v0.0.2 h1:czSzn+PIXCOAP/4NAIHTTziIKB8201PzoDkKTn+VR/8=
@@ -407,8 +412,8 @@ golang.org/x/sys v0.0.0-20200831180312-196b9ba8737a/go.mod h1:h1NjWce9XRLGQEsW7w
 golang.org/x/sys v0.0.0-20200909081042-eff7692f9009/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200922070232-aee5d888a860/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210326220804-49726bf1d181 h1:64ChN/hjER/taL4YJuA+gpLfIMT+/NFherRZixbxOhg=
-golang.org/x/sys v0.0.0-20210326220804-49726bf1d181/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210420205809-ac73e9fd8988 h1:EjgCl+fVlIaPJSori0ikSz3uV0DOHKWOJFpv1sAAhBM=
+golang.org/x/sys v0.0.0-20210420205809-ac73e9fd8988/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/term v0.0.0-20201113234701-d7a72108b828/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1 h1:v+OssWQX+hTHEmOBgwxdZxK4zHq3yOs8F9J7mk0PY8E=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
@@ -486,3 +491,4 @@ honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWh
 honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
 honnef.co/go/tools v0.0.1-2019.2.3 h1:3JgtbtFHMiCmsznwGVTUWbgGov+pVqnlf1dEJTNAXeM=
 honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg=
+rsc.io/tmplfunc v0.0.3/go.mod h1:AG3sTPzElb1Io3Yg4voV9AGZJuleGAwaVRxL9M49PhA=
diff --git a/scripts/golinter.sh b/scripts/golinter.sh
index 6177926ae3d..438e3a3c764 100755
--- a/scripts/golinter.sh
+++ b/scripts/golinter.sh
@@ -69,8 +69,9 @@ if [ -n "$OUTPUT" ]; then
     exit 1
 fi
 
+# staticcheck Fabric source files - ignore issues in vendored dependency projects
 echo "Checking with staticcheck"
-OUTPUT="$(staticcheck ./... || true)"
+OUTPUT="$(staticcheck ./... | grep -v vendor/ || true)"
 if [ -n "$OUTPUT" ]; then
     echo "The following staticcheck issues were flagged"
     echo "$OUTPUT"
diff --git a/vagrant/golang.sh b/vagrant/golang.sh
index 218e77c5fda..1423133f0f6 100644
--- a/vagrant/golang.sh
+++ b/vagrant/golang.sh
@@ -5,7 +5,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 GOROOT='/opt/go'
-GO_VERSION=1.16.7
+GO_VERSION=1.17.5
 
 # ----------------------------------------------------------------
 # Install Golang
diff --git a/vendor/github.com/IBM/idemix/go.mod b/vendor/github.com/IBM/idemix/go.mod
index 20a127f3db1..f90834977c3 100644
--- a/vendor/github.com/IBM/idemix/go.mod
+++ b/vendor/github.com/IBM/idemix/go.mod
@@ -3,7 +3,7 @@ module github.com/IBM/idemix
 go 1.16
 
 require (
-	github.com/IBM/mathlib v0.0.0-20210928081244-f5486459a290
+	github.com/IBM/mathlib v0.0.0-20220112091634-0a7378db6912
 	github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 // indirect
 	github.com/alecthomas/units v0.0.0-20210912230133-d1bdfacee922 // indirect
 	github.com/golang/protobuf v1.3.3
diff --git a/vendor/github.com/IBM/idemix/go.sum b/vendor/github.com/IBM/idemix/go.sum
index 76f71af7470..89a8a153f7b 100644
--- a/vendor/github.com/IBM/idemix/go.sum
+++ b/vendor/github.com/IBM/idemix/go.sum
@@ -1,8 +1,8 @@
 cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
 github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ=
 github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
-github.com/IBM/mathlib v0.0.0-20210928081244-f5486459a290 h1:usgCPts8YnOT6ba6CQLPzQ5Yb1crnQ8iU132Zm679IM=
-github.com/IBM/mathlib v0.0.0-20210928081244-f5486459a290/go.mod h1:grSmaMdY3LbW9QwqMrzuTUCHjES4rzT4Dm7q6yIL9vs=
+github.com/IBM/mathlib v0.0.0-20220112091634-0a7378db6912 h1:rySf+WTiafw7zS7P8GUcZGDd2nDTgLDUx51aIcCFuX4=
+github.com/IBM/mathlib v0.0.0-20220112091634-0a7378db6912/go.mod h1:WZGhleRZVSAg25iKkiWXHacTkui2CY1cyJMBOgpQwh8=
 github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 h1:JYp7IbQjafoB+tBA3gMyHYHrpOtNuDiK/uB5uXxq5wM=
 github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
 github.com/alecthomas/units v0.0.0-20210912230133-d1bdfacee922 h1:8ypNbf5sd3Sm3cKJ9waOGoQv6dKAFiFty9L6NP1AqJ4=
@@ -10,9 +10,9 @@ github.com/alecthomas/units v0.0.0-20210912230133-d1bdfacee922/go.mod h1:OMCwj8V
 github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
 github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
 github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
-github.com/consensys/bavard v0.1.8-0.20210329205436-c3e862ba4e5f/go.mod h1:Bpd0/3mZuaj6Sj+PqrmIquiOKy397AKGThQPaGzNXAQ=
-github.com/consensys/gnark-crypto v0.4.0 h1:KHf7Ta876Ys6L8+i0DLRRKOAa3PfJ8oobAX1CEeIa4A=
-github.com/consensys/gnark-crypto v0.4.0/go.mod h1:wK/gpXP9B06qTzTVML71GhKD1ygP9xOzukbI68NJqsQ=
+github.com/consensys/bavard v0.1.8-0.20210915155054-088da2f7f54a/go.mod h1:9ItSMtA/dXMAiL7BG6bqW2m3NdSEObYWoH223nGHukI=
+github.com/consensys/gnark-crypto v0.6.0 h1:K48rcIJaX2YkQT2k51EiHIxTynpHsOLHF1FVV+0aS7w=
+github.com/consensys/gnark-crypto v0.6.0/go.mod h1:PicAZJP763+7N9LZFfj+MquTXq98pwjD6l8Ry8WdHSU=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
@@ -31,6 +31,7 @@ github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaW
 github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
 github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
+github.com/google/subcommands v1.2.0/go.mod h1:ZjhPrFU+Olkh9WazFPsl27BQ4UPiG37m3yTrtFlrHVk=
 github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
 github.com/hyperledger/fabric-amcl v0.0.0-20210603140002-2670f91851c8 h1:BCR8ZlOZ+deUbWxyY6fpoY8LbB7PR5wGGwCTvWQOU2g=
 github.com/hyperledger/fabric-amcl v0.0.0-20210603140002-2670f91851c8/go.mod h1:X+DIyUsaTmalOpmpQfIvFZjKHQedrURQ5t4YqquX7lE=
@@ -44,6 +45,9 @@ github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
 github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
 github.com/leanovate/gopter v0.2.9 h1:fQjYxZaynp97ozCzfOyOuAGOU4aU/z37zf/tOujFk7c=
 github.com/leanovate/gopter v0.2.9/go.mod h1:U2L/78B+KVFIx2VmW6onHJQzXtFb+p5y3y2Sh+Jxxv8=
+github.com/mmcloughlin/addchain v0.4.0 h1:SobOdjm2xLj1KkXN5/n0xTIWyZA2+s99UCY1iPfkHRY=
+github.com/mmcloughlin/addchain v0.4.0/go.mod h1:A86O+tHqZLMNO4w6ZZ4FlVQEadcoqkyU72HC5wJ4RlU=
+github.com/mmcloughlin/profile v0.1.1/go.mod h1:IhHD7q1ooxgwTgjxQYkACGA77oFTDdFVejUS1/tS/qU=
 github.com/nxadm/tail v1.4.4 h1:DQuhQpB1tVlglWS2hLQ5OV6B5r8aGxSrPc5Qo6uTN78=
 github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A=
 github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
@@ -61,6 +65,7 @@ github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFR
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
 github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
+github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 github.com/stretchr/testify v1.7.1-0.20210116013205-6990a05d54c2 h1:oevpAKCW58ZYJe1hqfgLqg+1zXmYrQ9xf7HLUdfS+qM=
 github.com/stretchr/testify v1.7.1-0.20210116013205-6990a05d54c2/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 github.com/sykesm/zap-logfmt v0.0.2 h1:czSzn+PIXCOAP/4NAIHTTziIKB8201PzoDkKTn+VR/8=
@@ -78,6 +83,7 @@ go.uber.org/zap v1.16.0 h1:uFRZXykJGK9lLY4HtgSw44DnIcAM+kRBP7x5m+NpAOM=
 go.uber.org/zap v1.16.0/go.mod h1:MA8QOfq0BHJwdXa996Y4dYkAqRKB8/1K1QMMZVaNZjQ=
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
 golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
+golang.org/x/crypto v0.0.0-20210322153248-0c34fe9e7dc2 h1:It14KIkyBFYkHkwZ7k45minvA9aorojkyjGk9KJ5B/w=
 golang.org/x/crypto v0.0.0-20210322153248-0c34fe9e7dc2/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4=
 golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
@@ -107,8 +113,8 @@ golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7w
 golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200519105757-fe76b779f299/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210326220804-49726bf1d181 h1:64ChN/hjER/taL4YJuA+gpLfIMT+/NFherRZixbxOhg=
-golang.org/x/sys v0.0.0-20210326220804-49726bf1d181/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210420205809-ac73e9fd8988 h1:EjgCl+fVlIaPJSori0ikSz3uV0DOHKWOJFpv1sAAhBM=
+golang.org/x/sys v0.0.0-20210420205809-ac73e9fd8988/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
@@ -153,3 +159,4 @@ honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWh
 honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
 honnef.co/go/tools v0.0.1-2019.2.3 h1:3JgtbtFHMiCmsznwGVTUWbgGov+pVqnlf1dEJTNAXeM=
 honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg=
+rsc.io/tmplfunc v0.0.3/go.mod h1:AG3sTPzElb1Io3Yg4voV9AGZJuleGAwaVRxL9M49PhA=
diff --git a/vendor/github.com/IBM/mathlib/go.mod b/vendor/github.com/IBM/mathlib/go.mod
index 5361b7d030c..94deb01ad2f 100644
--- a/vendor/github.com/IBM/mathlib/go.mod
+++ b/vendor/github.com/IBM/mathlib/go.mod
@@ -3,8 +3,8 @@ module github.com/IBM/mathlib
 go 1.16
 
 require (
-	github.com/consensys/gnark-crypto v0.4.0
+	github.com/consensys/gnark-crypto v0.6.0
 	github.com/hyperledger/fabric-amcl v0.0.0-20210603140002-2670f91851c8
 	github.com/pkg/errors v0.8.1
-	github.com/stretchr/testify v1.4.0
+	github.com/stretchr/testify v1.7.0
 )
diff --git a/vendor/github.com/IBM/mathlib/go.sum b/vendor/github.com/IBM/mathlib/go.sum
index c444a6712f3..b75f9392eaf 100644
--- a/vendor/github.com/IBM/mathlib/go.sum
+++ b/vendor/github.com/IBM/mathlib/go.sum
@@ -1,28 +1,34 @@
-github.com/consensys/bavard v0.1.8-0.20210329205436-c3e862ba4e5f/go.mod h1:Bpd0/3mZuaj6Sj+PqrmIquiOKy397AKGThQPaGzNXAQ=
-github.com/consensys/gnark-crypto v0.4.0 h1:KHf7Ta876Ys6L8+i0DLRRKOAa3PfJ8oobAX1CEeIa4A=
-github.com/consensys/gnark-crypto v0.4.0/go.mod h1:wK/gpXP9B06qTzTVML71GhKD1ygP9xOzukbI68NJqsQ=
+github.com/consensys/bavard v0.1.8-0.20210915155054-088da2f7f54a/go.mod h1:9ItSMtA/dXMAiL7BG6bqW2m3NdSEObYWoH223nGHukI=
+github.com/consensys/gnark-crypto v0.6.0 h1:K48rcIJaX2YkQT2k51EiHIxTynpHsOLHF1FVV+0aS7w=
+github.com/consensys/gnark-crypto v0.6.0/go.mod h1:PicAZJP763+7N9LZFfj+MquTXq98pwjD6l8Ry8WdHSU=
 github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/google/subcommands v1.2.0/go.mod h1:ZjhPrFU+Olkh9WazFPsl27BQ4UPiG37m3yTrtFlrHVk=
 github.com/hyperledger/fabric-amcl v0.0.0-20210603140002-2670f91851c8 h1:BCR8ZlOZ+deUbWxyY6fpoY8LbB7PR5wGGwCTvWQOU2g=
 github.com/hyperledger/fabric-amcl v0.0.0-20210603140002-2670f91851c8/go.mod h1:X+DIyUsaTmalOpmpQfIvFZjKHQedrURQ5t4YqquX7lE=
 github.com/leanovate/gopter v0.2.9 h1:fQjYxZaynp97ozCzfOyOuAGOU4aU/z37zf/tOujFk7c=
 github.com/leanovate/gopter v0.2.9/go.mod h1:U2L/78B+KVFIx2VmW6onHJQzXtFb+p5y3y2Sh+Jxxv8=
+github.com/mmcloughlin/addchain v0.4.0 h1:SobOdjm2xLj1KkXN5/n0xTIWyZA2+s99UCY1iPfkHRY=
+github.com/mmcloughlin/addchain v0.4.0/go.mod h1:A86O+tHqZLMNO4w6ZZ4FlVQEadcoqkyU72HC5wJ4RlU=
+github.com/mmcloughlin/profile v0.1.1/go.mod h1:IhHD7q1ooxgwTgjxQYkACGA77oFTDdFVejUS1/tS/qU=
 github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I=
 github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
-github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
-github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
+github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
+github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+golang.org/x/crypto v0.0.0-20210322153248-0c34fe9e7dc2 h1:It14KIkyBFYkHkwZ7k45minvA9aorojkyjGk9KJ5B/w=
 golang.org/x/crypto v0.0.0-20210322153248-0c34fe9e7dc2/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4=
 golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
 golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210326220804-49726bf1d181 h1:64ChN/hjER/taL4YJuA+gpLfIMT+/NFherRZixbxOhg=
-golang.org/x/sys v0.0.0-20210326220804-49726bf1d181/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210420205809-ac73e9fd8988 h1:EjgCl+fVlIaPJSori0ikSz3uV0DOHKWOJFpv1sAAhBM=
+golang.org/x/sys v0.0.0-20210420205809-ac73e9fd8988/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
-gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=
-gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo=
+gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+rsc.io/tmplfunc v0.0.3/go.mod h1:AG3sTPzElb1Io3Yg4voV9AGZJuleGAwaVRxL9M49PhA=
diff --git a/vendor/github.com/cespare/xxhash/v2/.travis.yml b/vendor/github.com/cespare/xxhash/v2/.travis.yml
deleted file mode 100644
index c516ea88da7..00000000000
--- a/vendor/github.com/cespare/xxhash/v2/.travis.yml
+++ /dev/null
@@ -1,8 +0,0 @@
-language: go
-go:
-  - "1.x"
-  - master
-env:
-  - TAGS=""
-  - TAGS="-tags purego"
-script: go test $TAGS -v ./...
diff --git a/vendor/github.com/cespare/xxhash/v2/README.md b/vendor/github.com/cespare/xxhash/v2/README.md
index 2fd8693c21b..792b4a60b34 100644
--- a/vendor/github.com/cespare/xxhash/v2/README.md
+++ b/vendor/github.com/cespare/xxhash/v2/README.md
@@ -1,7 +1,7 @@
 # xxhash
 
-[![GoDoc](https://godoc.org/github.com/cespare/xxhash?status.svg)](https://godoc.org/github.com/cespare/xxhash)
-[![Build Status](https://travis-ci.org/cespare/xxhash.svg?branch=master)](https://travis-ci.org/cespare/xxhash)
+[![Go Reference](https://pkg.go.dev/badge/github.com/cespare/xxhash/v2.svg)](https://pkg.go.dev/github.com/cespare/xxhash/v2)
+[![Test](https://github.com/cespare/xxhash/actions/workflows/test.yml/badge.svg)](https://github.com/cespare/xxhash/actions/workflows/test.yml)
 
 xxhash is a Go implementation of the 64-bit
 [xxHash](http://cyan4973.github.io/xxHash/) algorithm, XXH64. This is a
@@ -64,4 +64,6 @@ $ go test -benchtime 10s -bench '/xxhash,direct,bytes'
 
 - [InfluxDB](https://github.com/influxdata/influxdb)
 - [Prometheus](https://github.com/prometheus/prometheus)
+- [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics)
 - [FreeCache](https://github.com/coocood/freecache)
+- [FastCache](https://github.com/VictoriaMetrics/fastcache)
diff --git a/vendor/github.com/cespare/xxhash/v2/xxhash.go b/vendor/github.com/cespare/xxhash/v2/xxhash.go
index db0b35fbe39..15c835d5417 100644
--- a/vendor/github.com/cespare/xxhash/v2/xxhash.go
+++ b/vendor/github.com/cespare/xxhash/v2/xxhash.go
@@ -193,7 +193,6 @@ func (d *Digest) UnmarshalBinary(b []byte) error {
 	b, d.v4 = consumeUint64(b)
 	b, d.total = consumeUint64(b)
 	copy(d.mem[:], b)
-	b = b[len(d.mem):]
 	d.n = int(d.total % uint64(len(d.mem)))
 	return nil
 }
diff --git a/vendor/github.com/cespare/xxhash/v2/xxhash_amd64.s b/vendor/github.com/cespare/xxhash/v2/xxhash_amd64.s
index d580e32aed4..be8db5bf796 100644
--- a/vendor/github.com/cespare/xxhash/v2/xxhash_amd64.s
+++ b/vendor/github.com/cespare/xxhash/v2/xxhash_amd64.s
@@ -6,7 +6,7 @@
 
 // Register allocation:
 // AX	h
-// CX	pointer to advance through b
+// SI	pointer to advance through b
 // DX	n
 // BX	loop end
 // R8	v1, k1
@@ -16,39 +16,39 @@
 // R12	tmp
 // R13	prime1v
 // R14	prime2v
-// R15	prime4v
+// DI	prime4v
 
-// round reads from and advances the buffer pointer in CX.
+// round reads from and advances the buffer pointer in SI.
 // It assumes that R13 has prime1v and R14 has prime2v.
 #define round(r) \
-	MOVQ  (CX), R12 \
-	ADDQ  $8, CX    \
+	MOVQ  (SI), R12 \
+	ADDQ  $8, SI    \
 	IMULQ R14, R12  \
 	ADDQ  R12, r    \
 	ROLQ  $31, r    \
 	IMULQ R13, r
 
 // mergeRound applies a merge round on the two registers acc and val.
-// It assumes that R13 has prime1v, R14 has prime2v, and R15 has prime4v.
+// It assumes that R13 has prime1v, R14 has prime2v, and DI has prime4v.
 #define mergeRound(acc, val) \
 	IMULQ R14, val \
 	ROLQ  $31, val \
 	IMULQ R13, val \
 	XORQ  val, acc \
 	IMULQ R13, acc \
-	ADDQ  R15, acc
+	ADDQ  DI, acc
 
 // func Sum64(b []byte) uint64
 TEXT ·Sum64(SB), NOSPLIT, $0-32
 	// Load fixed primes.
 	MOVQ ·prime1v(SB), R13
 	MOVQ ·prime2v(SB), R14
-	MOVQ ·prime4v(SB), R15
+	MOVQ ·prime4v(SB), DI
 
 	// Load slice.
-	MOVQ b_base+0(FP), CX
+	MOVQ b_base+0(FP), SI
 	MOVQ b_len+8(FP), DX
-	LEAQ (CX)(DX*1), BX
+	LEAQ (SI)(DX*1), BX
 
 	// The first loop limit will be len(b)-32.
 	SUBQ $32, BX
@@ -65,14 +65,14 @@ TEXT ·Sum64(SB), NOSPLIT, $0-32
 	XORQ R11, R11
 	SUBQ R13, R11
 
-	// Loop until CX > BX.
+	// Loop until SI > BX.
 blockLoop:
 	round(R8)
 	round(R9)
 	round(R10)
 	round(R11)
 
-	CMPQ CX, BX
+	CMPQ SI, BX
 	JLE  blockLoop
 
 	MOVQ R8, AX
@@ -100,16 +100,16 @@ noBlocks:
 afterBlocks:
 	ADDQ DX, AX
 
-	// Right now BX has len(b)-32, and we want to loop until CX > len(b)-8.
+	// Right now BX has len(b)-32, and we want to loop until SI > len(b)-8.
 	ADDQ $24, BX
 
-	CMPQ CX, BX
+	CMPQ SI, BX
 	JG   fourByte
 
 wordLoop:
 	// Calculate k1.
-	MOVQ  (CX), R8
-	ADDQ  $8, CX
+	MOVQ  (SI), R8
+	ADDQ  $8, SI
 	IMULQ R14, R8
 	ROLQ  $31, R8
 	IMULQ R13, R8
@@ -117,18 +117,18 @@ wordLoop:
 	XORQ  R8, AX
 	ROLQ  $27, AX
 	IMULQ R13, AX
-	ADDQ  R15, AX
+	ADDQ  DI, AX
 
-	CMPQ CX, BX
+	CMPQ SI, BX
 	JLE  wordLoop
 
 fourByte:
 	ADDQ $4, BX
-	CMPQ CX, BX
+	CMPQ SI, BX
 	JG   singles
 
-	MOVL  (CX), R8
-	ADDQ  $4, CX
+	MOVL  (SI), R8
+	ADDQ  $4, SI
 	IMULQ R13, R8
 	XORQ  R8, AX
 
@@ -138,19 +138,19 @@ fourByte:
 
 singles:
 	ADDQ $4, BX
-	CMPQ CX, BX
+	CMPQ SI, BX
 	JGE  finalize
 
 singlesLoop:
-	MOVBQZX (CX), R12
-	ADDQ    $1, CX
+	MOVBQZX (SI), R12
+	ADDQ    $1, SI
 	IMULQ   ·prime5v(SB), R12
 	XORQ    R12, AX
 
 	ROLQ  $11, AX
 	IMULQ R13, AX
 
-	CMPQ CX, BX
+	CMPQ SI, BX
 	JL   singlesLoop
 
 finalize:
@@ -179,9 +179,9 @@ TEXT ·writeBlocks(SB), NOSPLIT, $0-40
 	MOVQ ·prime2v(SB), R14
 
 	// Load slice.
-	MOVQ b_base+8(FP), CX
+	MOVQ b_base+8(FP), SI
 	MOVQ b_len+16(FP), DX
-	LEAQ (CX)(DX*1), BX
+	LEAQ (SI)(DX*1), BX
 	SUBQ $32, BX
 
 	// Load vN from d.
@@ -199,7 +199,7 @@ blockLoop:
 	round(R10)
 	round(R11)
 
-	CMPQ CX, BX
+	CMPQ SI, BX
 	JLE  blockLoop
 
 	// Copy vN back to d.
@@ -208,8 +208,8 @@ blockLoop:
 	MOVQ R10, 16(AX)
 	MOVQ R11, 24(AX)
 
-	// The number of bytes written is CX minus the old base pointer.
-	SUBQ b_base+8(FP), CX
-	MOVQ CX, ret+32(FP)
+	// The number of bytes written is SI minus the old base pointer.
+	SUBQ b_base+8(FP), SI
+	MOVQ SI, ret+32(FP)
 
 	RET
diff --git a/vendor/github.com/cespare/xxhash/v2/xxhash_unsafe.go b/vendor/github.com/cespare/xxhash/v2/xxhash_unsafe.go
index 53bf76efbc2..376e0ca2e49 100644
--- a/vendor/github.com/cespare/xxhash/v2/xxhash_unsafe.go
+++ b/vendor/github.com/cespare/xxhash/v2/xxhash_unsafe.go
@@ -6,41 +6,52 @@
 package xxhash
 
 import (
-	"reflect"
 	"unsafe"
 )
 
-// Notes:
+// In the future it's possible that compiler optimizations will make these
+// XxxString functions unnecessary by realizing that calls such as
+// Sum64([]byte(s)) don't need to copy s. See https://golang.org/issue/2205.
+// If that happens, even if we keep these functions they can be replaced with
+// the trivial safe code.
+
+// NOTE: The usual way of doing an unsafe string-to-[]byte conversion is:
 //
-// See https://groups.google.com/d/msg/golang-nuts/dcjzJy-bSpw/tcZYBzQqAQAJ
-// for some discussion about these unsafe conversions.
+//   var b []byte
+//   bh := (*reflect.SliceHeader)(unsafe.Pointer(&b))
+//   bh.Data = (*reflect.StringHeader)(unsafe.Pointer(&s)).Data
+//   bh.Len = len(s)
+//   bh.Cap = len(s)
 //
-// In the future it's possible that compiler optimizations will make these
-// unsafe operations unnecessary: https://golang.org/issue/2205.
+// Unfortunately, as of Go 1.15.3 the inliner's cost model assigns a high enough
+// weight to this sequence of expressions that any function that uses it will
+// not be inlined. Instead, the functions below use a different unsafe
+// conversion designed to minimize the inliner weight and allow both to be
+// inlined. There is also a test (TestInlining) which verifies that these are
+// inlined.
 //
-// Both of these wrapper functions still incur function call overhead since they
-// will not be inlined. We could write Go/asm copies of Sum64 and Digest.Write
-// for strings to squeeze out a bit more speed. Mid-stack inlining should
-// eventually fix this.
+// See https://github.com/golang/go/issues/42739 for discussion.
 
 // Sum64String computes the 64-bit xxHash digest of s.
 // It may be faster than Sum64([]byte(s)) by avoiding a copy.
 func Sum64String(s string) uint64 {
-	var b []byte
-	bh := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-	bh.Data = (*reflect.StringHeader)(unsafe.Pointer(&s)).Data
-	bh.Len = len(s)
-	bh.Cap = len(s)
+	b := *(*[]byte)(unsafe.Pointer(&sliceHeader{s, len(s)}))
 	return Sum64(b)
 }
 
 // WriteString adds more data to d. It always returns len(s), nil.
 // It may be faster than Write([]byte(s)) by avoiding a copy.
 func (d *Digest) WriteString(s string) (n int, err error) {
-	var b []byte
-	bh := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-	bh.Data = (*reflect.StringHeader)(unsafe.Pointer(&s)).Data
-	bh.Len = len(s)
-	bh.Cap = len(s)
-	return d.Write(b)
+	d.Write(*(*[]byte)(unsafe.Pointer(&sliceHeader{s, len(s)})))
+	// d.Write always returns len(s), nil.
+	// Ignoring the return output and returning these fixed values buys a
+	// savings of 6 in the inliner's cost model.
+	return len(s), nil
+}
+
+// sliceHeader is similar to reflect.SliceHeader, but it assumes that the layout
+// of the first two words is the same as the layout of a string.
+type sliceHeader struct {
+	s   string
+	cap int
 }
diff --git a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/bn254.go b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/bn254.go
index e27e9177a4b..c476c70ab0d 100644
--- a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/bn254.go
+++ b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/bn254.go
@@ -30,9 +30,6 @@ var twist fptower.E2
 // bTwistCurveCoeff b coeff of the twist (defined over Fp2) curve
 var bTwistCurveCoeff fptower.E2
 
-// twoInv 1/2 mod p (needed for DoubleStep in Miller loop)
-var twoInv fp.Element
-
 // generators of the r-torsion group, resp. in ker(pi-id), ker(Tr)
 var g1Gen G1Jac
 var g2Gen G2Jac
@@ -69,6 +66,9 @@ var endo struct {
 // generator of the curve
 var xGen big.Int
 
+// fixefCoeff t-1 = 6*xGen^2
+var fixedCoeff big.Int
+
 func init() {
 
 	bCurveCoeff.SetUint64(3)
@@ -76,8 +76,6 @@ func init() {
 	twist.A1.SetUint64(1)
 	bTwistCurveCoeff.Inverse(&twist).MulByElement(&bTwistCurveCoeff, &bCurveCoeff)
 
-	twoInv.SetOne().Double(&twoInv).Inverse(&twoInv)
-
 	g1Gen.X.SetString("1")
 	g1Gen.Y.SetString("2")
 	g1Gen.Z.SetString("1")
@@ -114,6 +112,8 @@ func init() {
 
 	xGen.SetString("4965661367192848881", 10)
 
+	fixedCoeff.SetString("147946756881789318990833708069417712966", 10)
+
 }
 
 // Generators return the generators of the r-torsion group, resp. in ker(pi-id), ker(Tr)
diff --git a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/doc.go b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/doc.go
new file mode 100644
index 00000000000..df3457ba2ae
--- /dev/null
+++ b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/doc.go
@@ -0,0 +1,18 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+// Package bn254 efficient elliptic curve and pairing implementation for bn254.
+package bn254
diff --git a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fp/asm.go b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fp/asm.go
index 715bc7ac121..7344271ebee 100644
--- a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fp/asm.go
+++ b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fp/asm.go
@@ -1,3 +1,4 @@
+//go:build !noadx
 // +build !noadx
 
 // Copyright 2020 ConsenSys Software Inc.
diff --git a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fp/asm_noadx.go b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fp/asm_noadx.go
index 371bfeaeb3a..ae778bd3a1f 100644
--- a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fp/asm_noadx.go
+++ b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fp/asm_noadx.go
@@ -1,3 +1,4 @@
+//go:build noadx
 // +build noadx
 
 // Copyright 2020 ConsenSys Software Inc.
diff --git a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fp/doc.go b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fp/doc.go
new file mode 100644
index 00000000000..7961440127d
--- /dev/null
+++ b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fp/doc.go
@@ -0,0 +1,43 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+// Package fp contains field arithmetic operations for modulus = 0x30644e...7cfd47.
+//
+// The API is similar to math/big (big.Int), but the operations are significantly faster (up to 20x for the modular multiplication on amd64, see also https://hackmd.io/@gnark/modular_multiplication)
+//
+// The modulus is hardcoded in all the operations.
+//
+// Field elements are represented as an array, and assumed to be in Montgomery form in all methods:
+// 	type Element [4]uint64
+//
+// Example API signature
+// 	// Mul z = x * y mod q
+// 	func (z *Element) Mul(x, y *Element) *Element
+//
+// and can be used like so:
+// 	var a, b Element
+// 	a.SetUint64(2)
+// 	b.SetString("984896738")
+// 	a.Mul(a, b)
+// 	a.Sub(a, a)
+// 	 .Add(a, b)
+// 	 .Inv(a)
+// 	b.Exp(b, new(big.Int).SetUint64(42))
+//
+// Modulus
+// 	0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47 // base 16
+// 	21888242871839275222246405745257275088696311157297823662689037894645226208583 // base 10
+package fp
diff --git a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fp/element.go b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fp/element.go
index 8123dbe14c6..0c5e5a20c88 100644
--- a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fp/element.go
+++ b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fp/element.go
@@ -14,7 +14,6 @@
 
 // Code generated by consensys/gnark-crypto DO NOT EDIT
 
-// Package fp contains field arithmetic operations for modulus 21888242871839275222246405745257275088696311157297823662689037894645226208583
 package fp
 
 // /!\ WARNING /!\
@@ -26,10 +25,13 @@ package fp
 import (
 	"crypto/rand"
 	"encoding/binary"
+	"errors"
 	"io"
 	"math/big"
 	"math/bits"
+	"reflect"
 	"strconv"
+	"strings"
 	"sync"
 )
 
@@ -61,13 +63,21 @@ func Modulus() *big.Int {
 }
 
 // q (modulus)
+const qElementWord0 uint64 = 4332616871279656263
+const qElementWord1 uint64 = 10917124144477883021
+const qElementWord2 uint64 = 13281191951274694749
+const qElementWord3 uint64 = 3486998266802970665
+
 var qElement = Element{
-	4332616871279656263,
-	10917124144477883021,
-	13281191951274694749,
-	3486998266802970665,
+	qElementWord0,
+	qElementWord1,
+	qElementWord2,
+	qElementWord3,
 }
 
+// Used for Montgomery reduction. (qInvNeg) q + r'.r = 1, i.e., qInvNeg = - q⁻¹ mod r
+const qInvNegLsw uint64 = 9786893198990664585
+
 // rSquare
 var rSquare = Element{
 	17522657719365597833,
@@ -86,12 +96,39 @@ func init() {
 	_modulus.SetString("21888242871839275222246405745257275088696311157297823662689037894645226208583", 10)
 }
 
-// SetUint64 z = v, sets z LSB to v (non-Montgomery form) and convert z to Montgomery form
+// NewElement returns a new Element from a uint64 value
+//
+// it is equivalent to
+// 		var v NewElement
+// 		v.SetUint64(...)
+func NewElement(v uint64) Element {
+	z := Element{v}
+	z.Mul(&z, &rSquare)
+	return z
+}
+
+// SetUint64 sets z to v and returns z
 func (z *Element) SetUint64(v uint64) *Element {
+	//  sets z LSB to v (non-Montgomery form) and convert z to Montgomery form
 	*z = Element{v}
 	return z.Mul(z, &rSquare) // z.ToMont()
 }
 
+// SetInt64 sets z to v and returns z
+func (z *Element) SetInt64(v int64) *Element {
+
+	// absolute value of v
+	m := v >> 63
+	z.SetUint64(uint64((v ^ m) - m))
+
+	if m != 0 {
+		// v is negative
+		z.Neg(z)
+	}
+
+	return z
+}
+
 // Set z = x
 func (z *Element) Set(x *Element) *Element {
 	z[0] = x[0]
@@ -101,28 +138,46 @@ func (z *Element) Set(x *Element) *Element {
 	return z
 }
 
-// SetInterface converts i1 from uint64, int, string, or Element, big.Int into Element
-// panic if provided type is not supported
-func (z *Element) SetInterface(i1 interface{}) *Element {
+// SetInterface converts provided interface into Element
+// returns an error if provided type is not supported
+// supported types: Element, *Element, uint64, int, string (interpreted as base10 integer),
+// *big.Int, big.Int, []byte
+func (z *Element) SetInterface(i1 interface{}) (*Element, error) {
 	switch c1 := i1.(type) {
 	case Element:
-		return z.Set(&c1)
+		return z.Set(&c1), nil
 	case *Element:
-		return z.Set(c1)
+		return z.Set(c1), nil
+	case uint8:
+		return z.SetUint64(uint64(c1)), nil
+	case uint16:
+		return z.SetUint64(uint64(c1)), nil
+	case uint32:
+		return z.SetUint64(uint64(c1)), nil
+	case uint:
+		return z.SetUint64(uint64(c1)), nil
 	case uint64:
-		return z.SetUint64(c1)
+		return z.SetUint64(c1), nil
+	case int8:
+		return z.SetInt64(int64(c1)), nil
+	case int16:
+		return z.SetInt64(int64(c1)), nil
+	case int32:
+		return z.SetInt64(int64(c1)), nil
+	case int64:
+		return z.SetInt64(c1), nil
 	case int:
-		return z.SetString(strconv.Itoa(c1))
+		return z.SetInt64(int64(c1)), nil
 	case string:
-		return z.SetString(c1)
+		return z.SetString(c1), nil
 	case *big.Int:
-		return z.SetBigInt(c1)
+		return z.SetBigInt(c1), nil
 	case big.Int:
-		return z.SetBigInt(&c1)
+		return z.SetBigInt(&c1), nil
 	case []byte:
-		return z.SetBytes(c1)
+		return z.SetBytes(c1), nil
 	default:
-		panic("invalid type")
+		return nil, errors.New("can't set fp.Element from type " + reflect.TypeOf(i1).String())
 	}
 }
 
@@ -152,6 +207,16 @@ func (z *Element) Div(x, y *Element) *Element {
 	return z
 }
 
+// Bit returns the i'th bit, with lsb == bit 0.
+// It is the responsability of the caller to convert from Montgomery to Regular form if needed
+func (z *Element) Bit(i uint64) uint64 {
+	j := i / 64
+	if j >= 4 {
+		return 0
+	}
+	return uint64(z[j] >> (i % 64) & 1)
+}
+
 // Equal returns z == x
 func (z *Element) Equal(x *Element) bool {
 	return (z[3] == x[3]) && (z[2] == x[2]) && (z[1] == x[1]) && (z[0] == x[0])
@@ -162,6 +227,11 @@ func (z *Element) IsZero() bool {
 	return (z[3] | z[2] | z[1] | z[0]) == 0
 }
 
+// IsUint64 reports whether z can be represented as an uint64.
+func (z *Element) IsUint64() bool {
+	return (z[3] | z[2] | z[1]) == 0
+}
+
 // Cmp compares (lexicographic order) z and x and returns:
 //
 //   -1 if z <  x
@@ -227,7 +297,7 @@ func (z *Element) SetRandom() (*Element, error) {
 	z[3] = binary.BigEndian.Uint64(bytes[24:32])
 	z[3] %= 3486998266802970665
 
-	// if z > q --> z -= q
+	// if z > q → z -= q
 	// note: this is NOT constant time
 	if !(z[3] < 3486998266802970665 || (z[3] == 3486998266802970665 && (z[2] < 13281191951274694749 || (z[2] == 13281191951274694749 && (z[1] < 10917124144477883021 || (z[1] == 10917124144477883021 && (z[0] < 4332616871279656263))))))) {
 		var b uint64
@@ -247,35 +317,39 @@ func One() Element {
 	return one
 }
 
-// MulAssign is deprecated
-// Deprecated: use Mul instead
-func (z *Element) MulAssign(x *Element) *Element {
-	return z.Mul(z, x)
-}
+// Halve sets z to z / 2 (mod p)
+func (z *Element) Halve() {
+	if z[0]&1 == 1 {
+		var carry uint64
 
-// AddAssign is deprecated
-// Deprecated: use Add instead
-func (z *Element) AddAssign(x *Element) *Element {
-	return z.Add(z, x)
-}
+		// z = z + q
+		z[0], carry = bits.Add64(z[0], 4332616871279656263, 0)
+		z[1], carry = bits.Add64(z[1], 10917124144477883021, carry)
+		z[2], carry = bits.Add64(z[2], 13281191951274694749, carry)
+		z[3], _ = bits.Add64(z[3], 3486998266802970665, carry)
+
+	}
+
+	// z = z >> 1
+
+	z[0] = z[0]>>1 | z[1]<<63
+	z[1] = z[1]>>1 | z[2]<<63
+	z[2] = z[2]>>1 | z[3]<<63
+	z[3] >>= 1
 
-// SubAssign is deprecated
-// Deprecated: use Sub instead
-func (z *Element) SubAssign(x *Element) *Element {
-	return z.Sub(z, x)
 }
 
 // API with assembly impl
 
 // Mul z = x * y mod q
-// see https://hackmd.io/@zkteam/modular_multiplication
+// see https://hackmd.io/@gnark/modular_multiplication
 func (z *Element) Mul(x, y *Element) *Element {
 	mul(z, x, y)
 	return z
 }
 
 // Square z = x * x mod q
-// see https://hackmd.io/@zkteam/modular_multiplication
+// see https://hackmd.io/@gnark/modular_multiplication
 func (z *Element) Square(x *Element) *Element {
 	mul(z, x, x)
 	return z
@@ -371,7 +445,58 @@ func _mulGeneric(z, x, y *Element) {
 		z[3], z[2] = madd3(m, 3486998266802970665, c[0], c[2], c[1])
 	}
 
-	// if z > q --> z -= q
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[3] < 3486998266802970665 || (z[3] == 3486998266802970665 && (z[2] < 13281191951274694749 || (z[2] == 13281191951274694749 && (z[1] < 10917124144477883021 || (z[1] == 10917124144477883021 && (z[0] < 4332616871279656263))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 4332616871279656263, 0)
+		z[1], b = bits.Sub64(z[1], 10917124144477883021, b)
+		z[2], b = bits.Sub64(z[2], 13281191951274694749, b)
+		z[3], _ = bits.Sub64(z[3], 3486998266802970665, b)
+	}
+}
+
+func _mulWGeneric(z, x *Element, y uint64) {
+
+	var t [4]uint64
+	{
+		// round 0
+		c1, c0 := bits.Mul64(y, x[0])
+		m := c0 * 9786893198990664585
+		c2 := madd0(m, 4332616871279656263, c0)
+		c1, c0 = madd1(y, x[1], c1)
+		c2, t[0] = madd2(m, 10917124144477883021, c2, c0)
+		c1, c0 = madd1(y, x[2], c1)
+		c2, t[1] = madd2(m, 13281191951274694749, c2, c0)
+		c1, c0 = madd1(y, x[3], c1)
+		t[3], t[2] = madd3(m, 3486998266802970665, c0, c2, c1)
+	}
+	{
+		// round 1
+		m := t[0] * 9786893198990664585
+		c2 := madd0(m, 4332616871279656263, t[0])
+		c2, t[0] = madd2(m, 10917124144477883021, c2, t[1])
+		c2, t[1] = madd2(m, 13281191951274694749, c2, t[2])
+		t[3], t[2] = madd2(m, 3486998266802970665, t[3], c2)
+	}
+	{
+		// round 2
+		m := t[0] * 9786893198990664585
+		c2 := madd0(m, 4332616871279656263, t[0])
+		c2, t[0] = madd2(m, 10917124144477883021, c2, t[1])
+		c2, t[1] = madd2(m, 13281191951274694749, c2, t[2])
+		t[3], t[2] = madd2(m, 3486998266802970665, t[3], c2)
+	}
+	{
+		// round 3
+		m := t[0] * 9786893198990664585
+		c2 := madd0(m, 4332616871279656263, t[0])
+		c2, z[0] = madd2(m, 10917124144477883021, c2, t[1])
+		c2, z[1] = madd2(m, 13281191951274694749, c2, t[2])
+		z[3], z[2] = madd2(m, 3486998266802970665, t[3], c2)
+	}
+
+	// if z > q → z -= q
 	// note: this is NOT constant time
 	if !(z[3] < 3486998266802970665 || (z[3] == 3486998266802970665 && (z[2] < 13281191951274694749 || (z[2] == 13281191951274694749 && (z[1] < 10917124144477883021 || (z[1] == 10917124144477883021 && (z[0] < 4332616871279656263))))))) {
 		var b uint64
@@ -422,7 +547,7 @@ func _fromMontGeneric(z *Element) {
 		z[3] = C
 	}
 
-	// if z > q --> z -= q
+	// if z > q → z -= q
 	// note: this is NOT constant time
 	if !(z[3] < 3486998266802970665 || (z[3] == 3486998266802970665 && (z[2] < 13281191951274694749 || (z[2] == 13281191951274694749 && (z[1] < 10917124144477883021 || (z[1] == 10917124144477883021 && (z[0] < 4332616871279656263))))))) {
 		var b uint64
@@ -441,7 +566,7 @@ func _addGeneric(z, x, y *Element) {
 	z[2], carry = bits.Add64(x[2], y[2], carry)
 	z[3], _ = bits.Add64(x[3], y[3], carry)
 
-	// if z > q --> z -= q
+	// if z > q → z -= q
 	// note: this is NOT constant time
 	if !(z[3] < 3486998266802970665 || (z[3] == 3486998266802970665 && (z[2] < 13281191951274694749 || (z[2] == 13281191951274694749 && (z[1] < 10917124144477883021 || (z[1] == 10917124144477883021 && (z[0] < 4332616871279656263))))))) {
 		var b uint64
@@ -460,7 +585,7 @@ func _doubleGeneric(z, x *Element) {
 	z[2], carry = bits.Add64(x[2], x[2], carry)
 	z[3], _ = bits.Add64(x[3], x[3], carry)
 
-	// if z > q --> z -= q
+	// if z > q → z -= q
 	// note: this is NOT constant time
 	if !(z[3] < 3486998266802970665 || (z[3] == 3486998266802970665 && (z[2] < 13281191951274694749 || (z[2] == 13281191951274694749 && (z[1] < 10917124144477883021 || (z[1] == 10917124144477883021 && (z[0] < 4332616871279656263))))))) {
 		var b uint64
@@ -500,7 +625,7 @@ func _negGeneric(z, x *Element) {
 
 func _reduceGeneric(z *Element) {
 
-	// if z > q --> z -= q
+	// if z > q → z -= q
 	// note: this is NOT constant time
 	if !(z[3] < 3486998266802970665 || (z[3] == 3486998266802970665 && (z[2] < 13281191951274694749 || (z[2] == 13281191951274694749 && (z[1] < 10917124144477883021 || (z[1] == 10917124144477883021 && (z[0] < 4332616871279656263))))))) {
 		var b uint64
@@ -528,8 +653,64 @@ func mulByConstant(z *Element, c uint8) {
 		_z := *z
 		z.Double(z).Double(z).Add(z, &_z)
 	default:
-		panic("not implemented")
+		var y Element
+		y.SetUint64(uint64(c))
+		z.Mul(z, &y)
+	}
+}
+
+// BatchInvert returns a new slice with every element inverted.
+// Uses Montgomery batch inversion trick
+func BatchInvert(a []Element) []Element {
+	res := make([]Element, len(a))
+	if len(a) == 0 {
+		return res
 	}
+
+	zeroes := make([]bool, len(a))
+	accumulator := One()
+
+	for i := 0; i < len(a); i++ {
+		if a[i].IsZero() {
+			zeroes[i] = true
+			continue
+		}
+		res[i] = accumulator
+		accumulator.Mul(&accumulator, &a[i])
+	}
+
+	accumulator.Inverse(&accumulator)
+
+	for i := len(a) - 1; i >= 0; i-- {
+		if zeroes[i] {
+			continue
+		}
+		res[i].Mul(&res[i], &accumulator)
+		accumulator.Mul(&accumulator, &a[i])
+	}
+
+	return res
+}
+
+func _butterflyGeneric(a, b *Element) {
+	t := *a
+	a.Add(a, b)
+	b.Sub(&t, b)
+}
+
+// BitLen returns the minimum number of bits needed to represent z
+// returns 0 if z == 0
+func (z *Element) BitLen() int {
+	if z[3] != 0 {
+		return 192 + bits.Len64(z[3])
+	}
+	if z[2] != 0 {
+		return 128 + bits.Len64(z[2])
+	}
+	if z[1] != 0 {
+		return 64 + bits.Len64(z[1])
+	}
+	return bits.Len64(z[0])
 }
 
 // Exp z = x^exponent mod q
@@ -552,7 +733,7 @@ func (z *Element) Exp(x Element, exponent *big.Int) *Element {
 }
 
 // ToMont converts z to Montgomery form
-// sets and returns z = z * r^2
+// sets and returns z = z * r²
 func (z *Element) ToMont() *Element {
 	return z.Mul(z, &rSquare)
 }
@@ -562,11 +743,41 @@ func (z Element) ToRegular() Element {
 	return *z.FromMont()
 }
 
-// String returns the string form of an Element in Montgomery form
+// String returns the decimal representation of z as generated by
+// z.Text(10).
 func (z *Element) String() string {
+	return z.Text(10)
+}
+
+// Text returns the string representation of z in the given base.
+// Base must be between 2 and 36, inclusive. The result uses the
+// lower-case letters 'a' to 'z' for digit values 10 to 35.
+// No prefix (such as "0x") is added to the string. If z is a nil
+// pointer it returns "<nil>".
+// If base == 10 and -z fits in a uint64 prefix "-" is added to the string.
+func (z *Element) Text(base int) string {
+	if base < 2 || base > 36 {
+		panic("invalid base")
+	}
+	if z == nil {
+		return "<nil>"
+	}
+	zz := *z
+	zz.FromMont()
+	if zz.IsUint64() {
+		return strconv.FormatUint(zz[0], base)
+	} else if base == 10 {
+		var zzNeg Element
+		zzNeg.Neg(z)
+		zzNeg.FromMont()
+		if zzNeg.IsUint64() {
+			return "-" + strconv.FormatUint(zzNeg[0], base)
+		}
+	}
 	vv := bigIntPool.Get().(*big.Int)
-	defer bigIntPool.Put(vv)
-	return z.ToBigIntRegular(vv).String()
+	r := zz.ToBigInt(vv).Text(base)
+	bigIntPool.Put(vv)
+	return r
 }
 
 // ToBigInt returns z as a big.Int in Montgomery form
@@ -598,6 +809,13 @@ func (z *Element) Bytes() (res [Limbs * 8]byte) {
 	return
 }
 
+// Marshal returns the regular (non montgomery) value
+// of z as a big-endian byte slice.
+func (z *Element) Marshal() []byte {
+	b := z.Bytes()
+	return b[:]
+}
+
 // SetBytes interprets e as the bytes of a big-endian unsigned integer,
 // sets z to that value (in Montgomery form), and returns z.
 func (z *Element) SetBytes(e []byte) *Element {
@@ -645,7 +863,7 @@ func (z *Element) SetBigInt(v *big.Int) *Element {
 	return z
 }
 
-// setBigInt assumes 0 <= v < q
+// setBigInt assumes 0 ⩽ v < q
 func (z *Element) setBigInt(v *big.Int) *Element {
 	vBits := v.Bits()
 
@@ -666,14 +884,30 @@ func (z *Element) setBigInt(v *big.Int) *Element {
 	return z.ToMont()
 }
 
-// SetString creates a big.Int with s (in base 10) and calls SetBigInt on z
-func (z *Element) SetString(s string) *Element {
+// SetString creates a big.Int with number and calls SetBigInt on z
+//
+// The number prefix determines the actual base: A prefix of
+// ''0b'' or ''0B'' selects base 2, ''0'', ''0o'' or ''0O'' selects base 8,
+// and ''0x'' or ''0X'' selects base 16. Otherwise, the selected base is 10
+// and no prefix is accepted.
+//
+// For base 16, lower and upper case letters are considered the same:
+// The letters 'a' to 'f' and 'A' to 'F' represent digit values 10 to 15.
+//
+// An underscore character ''_'' may appear between a base
+// prefix and an adjacent digit, and between successive digits; such
+// underscores do not change the value of the number.
+// Incorrect placement of underscores is reported as a panic if there
+// are no other errors.
+//
+func (z *Element) SetString(number string) *Element {
 	// get temporary big int from the pool
 	vv := bigIntPool.Get().(*big.Int)
 
-	if _, ok := vv.SetString(s, 10); !ok {
-		panic("Element.SetString failed -> can't parse number in base10 into a big.Int")
+	if _, ok := vv.SetString(number, 0); !ok {
+		panic("Element.SetString failed -> can't parse number into a big.Int " + number)
 	}
+
 	z.SetBigInt(vv)
 
 	// release object into pool
@@ -682,22 +916,59 @@ func (z *Element) SetString(s string) *Element {
 	return z
 }
 
-var (
-	_bLegendreExponentElement *big.Int
-	_bSqrtExponentElement     *big.Int
-)
+// MarshalJSON returns json encoding of z (z.Text(10))
+// If z == nil, returns null
+func (z *Element) MarshalJSON() ([]byte, error) {
+	if z == nil {
+		return []byte("null"), nil
+	}
+	const maxSafeBound = 15 // we encode it as number if it's small
+	s := z.Text(10)
+	if len(s) <= maxSafeBound {
+		return []byte(s), nil
+	}
+	var sbb strings.Builder
+	sbb.WriteByte('"')
+	sbb.WriteString(s)
+	sbb.WriteByte('"')
+	return []byte(sbb.String()), nil
+}
 
-func init() {
-	_bLegendreExponentElement, _ = new(big.Int).SetString("183227397098d014dc2822db40c0ac2ecbc0b548b438e5469e10460b6c3e7ea3", 16)
-	const sqrtExponentElement = "c19139cb84c680a6e14116da060561765e05aa45a1c72a34f082305b61f3f52"
-	_bSqrtExponentElement, _ = new(big.Int).SetString(sqrtExponentElement, 16)
+// UnmarshalJSON accepts numbers and strings as input
+// See Element.SetString for valid prefixes (0x, 0b, ...)
+func (z *Element) UnmarshalJSON(data []byte) error {
+	s := string(data)
+	if len(s) > Bits*3 {
+		return errors.New("value too large (max = Element.Bits * 3)")
+	}
+
+	// we accept numbers and strings, remove leading and trailing quotes if any
+	if len(s) > 0 && s[0] == '"' {
+		s = s[1:]
+	}
+	if len(s) > 0 && s[len(s)-1] == '"' {
+		s = s[:len(s)-1]
+	}
+
+	// get temporary big int from the pool
+	vv := bigIntPool.Get().(*big.Int)
+
+	if _, ok := vv.SetString(s, 0); !ok {
+		return errors.New("can't parse into a big.Int: " + s)
+	}
+
+	z.SetBigInt(vv)
+
+	// release object into pool
+	bigIntPool.Put(vv)
+	return nil
 }
 
 // Legendre returns the Legendre symbol of z (either +1, -1, or 0.)
 func (z *Element) Legendre() int {
 	var l Element
 	// z^((q-1)/2)
-	l.Exp(*z, _bLegendreExponentElement)
+	l.expByLegendreExp(*z)
 
 	if l.IsZero() {
 		return 0
@@ -717,7 +988,7 @@ func (z *Element) Sqrt(x *Element) *Element {
 	// q ≡ 3 (mod 4)
 	// using  z ≡ ± x^((p+1)/4) (mod q)
 	var y, square Element
-	y.Exp(*x, _bSqrtExponentElement)
+	y.expBySqrtExp(*x)
 	// as we didn't compute the legendre symbol, ensure we found y such that y * y = x
 	square.Square(&y)
 	if square.Equal(x) {
@@ -726,170 +997,418 @@ func (z *Element) Sqrt(x *Element) *Element {
 	return nil
 }
 
-// Inverse z = x^-1 mod q
-// Algorithm 16 in "Efficient Software-Implementation of Finite Fields with Applications to Cryptography"
-// if x == 0, sets and returns z = x
-func (z *Element) Inverse(x *Element) *Element {
-	if x.IsZero() {
-		return z.Set(x)
+func max(a int, b int) int {
+	if a > b {
+		return a
 	}
+	return b
+}
 
-	// initialize u = q
-	var u = Element{
-		4332616871279656263,
-		10917124144477883021,
-		13281191951274694749,
-		3486998266802970665,
+func min(a int, b int) int {
+	if a < b {
+		return a
 	}
+	return b
+}
+
+const updateFactorsConversionBias int64 = 0x7fffffff7fffffff // (2³¹ - 1)(2³² + 1)
+const updateFactorIdentityMatrixRow0 = 1
+const updateFactorIdentityMatrixRow1 = 1 << 32
+
+func updateFactorsDecompose(c int64) (int64, int64) {
+	c += updateFactorsConversionBias
+	const low32BitsFilter int64 = 0xFFFFFFFF
+	f := c&low32BitsFilter - 0x7FFFFFFF
+	g := c>>32&low32BitsFilter - 0x7FFFFFFF
+	return f, g
+}
+
+const k = 32 // word size / 2
+const signBitSelector = uint64(1) << 63
+const approxLowBitsN = k - 1
+const approxHighBitsN = k + 1
+const inversionCorrectionFactorWord0 = 11111708840330028223
+const inversionCorrectionFactorWord1 = 3098618286181893933
+const inversionCorrectionFactorWord2 = 756602578711705709
+const inversionCorrectionFactorWord3 = 1041752015607019851
 
-	// initialize s = r^2
-	var s = Element{
-		17522657719365597833,
-		13107472804851548667,
-		5164255478447964150,
-		493319470278259999,
+const invIterationsN = 18
+
+// Inverse z = x⁻¹ mod q
+// Implements "Optimized Binary GCD for Modular Inversion"
+// https://github.com/pornin/bingcd/blob/main/doc/bingcd.pdf
+func (z *Element) Inverse(x *Element) *Element {
+	if x.IsZero() {
+		z.SetZero()
+		return z
 	}
 
-	// r = 0
-	r := Element{}
+	a := *x
+	b := Element{
+		qElementWord0,
+		qElementWord1,
+		qElementWord2,
+		qElementWord3,
+	} // b := q
+
+	u := Element{1}
 
-	v := *x
+	// Update factors: we get [u; v]:= [f0 g0; f1 g1] [u; v]
+	// c_i = f_i + 2³¹ - 1 + 2³² * (g_i + 2³¹ - 1)
+	var c0, c1 int64
 
-	var carry, borrow, t, t2 uint64
-	var bigger bool
+	// Saved update factors to reduce the number of field multiplications
+	var pf0, pf1, pg0, pg1 int64
 
-	for {
-		for v[0]&1 == 0 {
+	var i uint
 
-			// v = v >> 1
-			t2 = v[3] << 63
-			v[3] >>= 1
-			t = t2
-			t2 = v[2] << 63
-			v[2] = (v[2] >> 1) | t
-			t = t2
-			t2 = v[1] << 63
-			v[1] = (v[1] >> 1) | t
-			t = t2
-			v[0] = (v[0] >> 1) | t
+	var v, s Element
 
-			if s[0]&1 == 1 {
+	// Since u,v are updated every other iteration, we must make sure we terminate after evenly many iterations
+	// This also lets us get away with half as many updates to u,v
+	// To make this constant-time-ish, replace the condition with i < invIterationsN
+	for i = 0; i&1 == 1 || !a.IsZero(); i++ {
+		n := max(a.BitLen(), b.BitLen())
+		aApprox, bApprox := approximate(&a, n), approximate(&b, n)
 
-				// s = s + q
-				s[0], carry = bits.Add64(s[0], 4332616871279656263, 0)
-				s[1], carry = bits.Add64(s[1], 10917124144477883021, carry)
-				s[2], carry = bits.Add64(s[2], 13281191951274694749, carry)
-				s[3], _ = bits.Add64(s[3], 3486998266802970665, carry)
+		// After 0 iterations, we have f₀ ≤ 2⁰ and f₁ < 2⁰
+		// f0, g0, f1, g1 = 1, 0, 0, 1
+		c0, c1 = updateFactorIdentityMatrixRow0, updateFactorIdentityMatrixRow1
 
+		for j := 0; j < approxLowBitsN; j++ {
+
+			if aApprox&1 == 0 {
+				aApprox /= 2
+			} else {
+				s, borrow := bits.Sub64(aApprox, bApprox, 0)
+				if borrow == 1 {
+					s = bApprox - aApprox
+					bApprox = aApprox
+					c0, c1 = c1, c0
+				}
+
+				aApprox = s / 2
+				c0 = c0 - c1
+
+				// Now |f₀| < 2ʲ + 2ʲ = 2ʲ⁺¹
+				// |f₁| ≤ 2ʲ still
 			}
 
-			// s = s >> 1
-			t2 = s[3] << 63
-			s[3] >>= 1
-			t = t2
-			t2 = s[2] << 63
-			s[2] = (s[2] >> 1) | t
-			t = t2
-			t2 = s[1] << 63
-			s[1] = (s[1] >> 1) | t
-			t = t2
-			s[0] = (s[0] >> 1) | t
+			c1 *= 2
+			// |f₁| ≤ 2ʲ⁺¹
+		}
+
+		s = a
 
+		var g0 int64
+		// from this point on c0 aliases for f0
+		c0, g0 = updateFactorsDecompose(c0)
+		aHi := a.linearCombNonModular(&s, c0, &b, g0)
+		if aHi&signBitSelector != 0 {
+			// if aHi < 0
+			c0, g0 = -c0, -g0
+			aHi = a.neg(&a, aHi)
+		}
+		// right-shift a by k-1 bits
+		a[0] = (a[0] >> approxLowBitsN) | ((a[1]) << approxHighBitsN)
+		a[1] = (a[1] >> approxLowBitsN) | ((a[2]) << approxHighBitsN)
+		a[2] = (a[2] >> approxLowBitsN) | ((a[3]) << approxHighBitsN)
+		a[3] = (a[3] >> approxLowBitsN) | (aHi << approxHighBitsN)
+
+		var f1 int64
+		// from this point on c1 aliases for g0
+		f1, c1 = updateFactorsDecompose(c1)
+		bHi := b.linearCombNonModular(&s, f1, &b, c1)
+		if bHi&signBitSelector != 0 {
+			// if bHi < 0
+			f1, c1 = -f1, -c1
+			bHi = b.neg(&b, bHi)
 		}
-		for u[0]&1 == 0 {
-
-			// u = u >> 1
-			t2 = u[3] << 63
-			u[3] >>= 1
-			t = t2
-			t2 = u[2] << 63
-			u[2] = (u[2] >> 1) | t
-			t = t2
-			t2 = u[1] << 63
-			u[1] = (u[1] >> 1) | t
-			t = t2
-			u[0] = (u[0] >> 1) | t
-
-			if r[0]&1 == 1 {
-
-				// r = r + q
-				r[0], carry = bits.Add64(r[0], 4332616871279656263, 0)
-				r[1], carry = bits.Add64(r[1], 10917124144477883021, carry)
-				r[2], carry = bits.Add64(r[2], 13281191951274694749, carry)
-				r[3], _ = bits.Add64(r[3], 3486998266802970665, carry)
+		// right-shift b by k-1 bits
+		b[0] = (b[0] >> approxLowBitsN) | ((b[1]) << approxHighBitsN)
+		b[1] = (b[1] >> approxLowBitsN) | ((b[2]) << approxHighBitsN)
+		b[2] = (b[2] >> approxLowBitsN) | ((b[3]) << approxHighBitsN)
+		b[3] = (b[3] >> approxLowBitsN) | (bHi << approxHighBitsN)
+
+		if i&1 == 1 {
+			// Combine current update factors with previously stored ones
+			// [f₀, g₀; f₁, g₁] ← [f₀, g₀; f₁, g₀] [pf₀, pg₀; pf₀, pg₀]
+			// We have |f₀|, |g₀|, |pf₀|, |pf₁| ≤ 2ᵏ⁻¹, and that |pf_i| < 2ᵏ⁻¹ for i ∈ {0, 1}
+			// Then for the new value we get |f₀| < 2ᵏ⁻¹ × 2ᵏ⁻¹ + 2ᵏ⁻¹ × 2ᵏ⁻¹ = 2²ᵏ⁻¹
+			// Which leaves us with an extra bit for the sign
+
+			// c0 aliases f0, c1 aliases g1
+			c0, g0, f1, c1 = c0*pf0+g0*pf1,
+				c0*pg0+g0*pg1,
+				f1*pf0+c1*pf1,
+				f1*pg0+c1*pg1
+
+			s = u
+			u.linearCombSosSigned(&u, c0, &v, g0)
+			v.linearCombSosSigned(&s, f1, &v, c1)
 
-			}
+		} else {
+			// Save update factors
+			pf0, pg0, pf1, pg1 = c0, g0, f1, c1
+		}
+	}
+
+	// For every iteration that we miss, v is not being multiplied by 2²ᵏ⁻²
+	const pSq int64 = 1 << (2 * (k - 1))
+	// If the function is constant-time ish, this loop will not run (probably no need to take it out explicitly)
+	for ; i < invIterationsN; i += 2 {
+		v.mulWSigned(&v, pSq)
+	}
+
+	z.Mul(&v, &Element{
+		inversionCorrectionFactorWord0,
+		inversionCorrectionFactorWord1,
+		inversionCorrectionFactorWord2,
+		inversionCorrectionFactorWord3,
+	})
+	return z
+}
+
+// approximate a big number x into a single 64 bit word using its uppermost and lowermost bits
+// if x fits in a word as is, no approximation necessary
+func approximate(x *Element, nBits int) uint64 {
+
+	if nBits <= 64 {
+		return x[0]
+	}
+
+	const mask = (uint64(1) << (k - 1)) - 1 // k-1 ones
+	lo := mask & x[0]
+
+	hiWordIndex := (nBits - 1) / 64
+
+	hiWordBitsAvailable := nBits - hiWordIndex*64
+	hiWordBitsUsed := min(hiWordBitsAvailable, approxHighBitsN)
+
+	mask_ := uint64(^((1 << (hiWordBitsAvailable - hiWordBitsUsed)) - 1))
+	hi := (x[hiWordIndex] & mask_) << (64 - hiWordBitsAvailable)
+
+	mask_ = ^(1<<(approxLowBitsN+hiWordBitsUsed) - 1)
+	mid := (mask_ & x[hiWordIndex-1]) >> hiWordBitsUsed
+
+	return lo | mid | hi
+}
+
+func (z *Element) linearCombSosSigned(x *Element, xC int64, y *Element, yC int64) {
+	hi := z.linearCombNonModular(x, xC, y, yC)
+	z.montReduceSigned(z, hi)
+}
+
+// montReduceSigned SOS algorithm; xHi must be at most 63 bits long. Last bit of xHi may be used as a sign bit
+func (z *Element) montReduceSigned(x *Element, xHi uint64) {
+
+	const signBitRemover = ^signBitSelector
+	neg := xHi&signBitSelector != 0
+	// the SOS implementation requires that most significant bit is 0
+	// Let X be xHi*r + x
+	// note that if X is negative we would have initially stored it as 2⁶⁴ r + X
+	xHi &= signBitRemover
+	// with this a negative X is now represented as 2⁶³ r + X
+
+	var t [2*Limbs - 1]uint64
+	var C uint64
+
+	m := x[0] * qInvNegLsw
 
-			// r = r >> 1
-			t2 = r[3] << 63
-			r[3] >>= 1
-			t = t2
-			t2 = r[2] << 63
-			r[2] = (r[2] >> 1) | t
-			t = t2
-			t2 = r[1] << 63
-			r[1] = (r[1] >> 1) | t
-			t = t2
-			r[0] = (r[0] >> 1) | t
+	C = madd0(m, qElementWord0, x[0])
+	C, t[1] = madd2(m, qElementWord1, x[1], C)
+	C, t[2] = madd2(m, qElementWord2, x[2], C)
+	C, t[3] = madd2(m, qElementWord3, x[3], C)
 
+	// the high word of m * qElement[3] is at most 62 bits
+	// x[3] + C is at most 65 bits (high word at most 1 bit)
+	// Thus the resulting C will be at most 63 bits
+	t[4] = xHi + C
+	// xHi and C are 63 bits, therefore no overflow
+
+	{
+		const i = 1
+		m = t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, t[i+1] = madd2(m, qElementWord1, t[i+1], C)
+		C, t[i+2] = madd2(m, qElementWord2, t[i+2], C)
+		C, t[i+3] = madd2(m, qElementWord3, t[i+3], C)
+
+		t[i+Limbs] += C
+	}
+	{
+		const i = 2
+		m = t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, t[i+1] = madd2(m, qElementWord1, t[i+1], C)
+		C, t[i+2] = madd2(m, qElementWord2, t[i+2], C)
+		C, t[i+3] = madd2(m, qElementWord3, t[i+3], C)
+
+		t[i+Limbs] += C
+	}
+	{
+		const i = 3
+		m := t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, z[0] = madd2(m, qElementWord1, t[i+1], C)
+		C, z[1] = madd2(m, qElementWord2, t[i+2], C)
+		z[3], z[2] = madd2(m, qElementWord3, t[i+3], C)
+	}
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[3] < 3486998266802970665 || (z[3] == 3486998266802970665 && (z[2] < 13281191951274694749 || (z[2] == 13281191951274694749 && (z[1] < 10917124144477883021 || (z[1] == 10917124144477883021 && (z[0] < 4332616871279656263))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 4332616871279656263, 0)
+		z[1], b = bits.Sub64(z[1], 10917124144477883021, b)
+		z[2], b = bits.Sub64(z[2], 13281191951274694749, b)
+		z[3], _ = bits.Sub64(z[3], 3486998266802970665, b)
+	}
+	if neg {
+		// We have computed ( 2⁶³ r + X ) r⁻¹ = 2⁶³ + X r⁻¹ instead
+		var b uint64
+		z[0], b = bits.Sub64(z[0], signBitSelector, 0)
+		z[1], b = bits.Sub64(z[1], 0, b)
+		z[2], b = bits.Sub64(z[2], 0, b)
+		z[3], b = bits.Sub64(z[3], 0, b)
+
+		// Occurs iff x == 0 && xHi < 0, i.e. X = rX' for -2⁶³ ≤ X' < 0
+		if b != 0 {
+			// z[3] = -1
+			// negative: add q
+			const neg1 = 0xFFFFFFFFFFFFFFFF
+
+			b = 0
+			z[0], b = bits.Add64(z[0], qElementWord0, b)
+			z[1], b = bits.Add64(z[1], qElementWord1, b)
+			z[2], b = bits.Add64(z[2], qElementWord2, b)
+			z[3], _ = bits.Add64(neg1, qElementWord3, b)
 		}
+	}
+}
 
-		// v >= u
-		bigger = !(v[3] < u[3] || (v[3] == u[3] && (v[2] < u[2] || (v[2] == u[2] && (v[1] < u[1] || (v[1] == u[1] && (v[0] < u[0])))))))
+// mulWSigned mul word signed (w/ montgomery reduction)
+func (z *Element) mulWSigned(x *Element, y int64) {
+	m := y >> 63
+	_mulWGeneric(z, x, uint64((y^m)-m))
+	// multiply by abs(y)
+	if y < 0 {
+		z.Neg(z)
+	}
+}
 
-		if bigger {
+func (z *Element) neg(x *Element, xHi uint64) uint64 {
+	var b uint64
 
-			// v = v - u
-			v[0], borrow = bits.Sub64(v[0], u[0], 0)
-			v[1], borrow = bits.Sub64(v[1], u[1], borrow)
-			v[2], borrow = bits.Sub64(v[2], u[2], borrow)
-			v[3], _ = bits.Sub64(v[3], u[3], borrow)
+	z[0], b = bits.Sub64(0, x[0], 0)
+	z[1], b = bits.Sub64(0, x[1], b)
+	z[2], b = bits.Sub64(0, x[2], b)
+	z[3], b = bits.Sub64(0, x[3], b)
+	xHi, _ = bits.Sub64(0, xHi, b)
 
-			// s = s - r
-			s[0], borrow = bits.Sub64(s[0], r[0], 0)
-			s[1], borrow = bits.Sub64(s[1], r[1], borrow)
-			s[2], borrow = bits.Sub64(s[2], r[2], borrow)
-			s[3], borrow = bits.Sub64(s[3], r[3], borrow)
+	return xHi
+}
 
-			if borrow == 1 {
+// regular multiplication by one word regular (non montgomery)
+// Fewer additions than the branch-free for positive y. Could be faster on some architectures
+func (z *Element) mulWRegular(x *Element, y int64) uint64 {
 
-				// s = s + q
-				s[0], carry = bits.Add64(s[0], 4332616871279656263, 0)
-				s[1], carry = bits.Add64(s[1], 10917124144477883021, carry)
-				s[2], carry = bits.Add64(s[2], 13281191951274694749, carry)
-				s[3], _ = bits.Add64(s[3], 3486998266802970665, carry)
+	// w := abs(y)
+	m := y >> 63
+	w := uint64((y ^ m) - m)
 
-			}
-		} else {
+	var c uint64
+	c, z[0] = bits.Mul64(x[0], w)
+	c, z[1] = madd1(x[1], w, c)
+	c, z[2] = madd1(x[2], w, c)
+	c, z[3] = madd1(x[3], w, c)
 
-			// u = u - v
-			u[0], borrow = bits.Sub64(u[0], v[0], 0)
-			u[1], borrow = bits.Sub64(u[1], v[1], borrow)
-			u[2], borrow = bits.Sub64(u[2], v[2], borrow)
-			u[3], _ = bits.Sub64(u[3], v[3], borrow)
+	if y < 0 {
+		c = z.neg(z, c)
+	}
 
-			// r = r - s
-			r[0], borrow = bits.Sub64(r[0], s[0], 0)
-			r[1], borrow = bits.Sub64(r[1], s[1], borrow)
-			r[2], borrow = bits.Sub64(r[2], s[2], borrow)
-			r[3], borrow = bits.Sub64(r[3], s[3], borrow)
+	return c
+}
 
-			if borrow == 1 {
+/*
+Removed: seems slower
+// mulWRegular branch-free regular multiplication by one word (non montgomery)
+func (z *Element) mulWRegularBf(x *Element, y int64) uint64 {
 
-				// r = r + q
-				r[0], carry = bits.Add64(r[0], 4332616871279656263, 0)
-				r[1], carry = bits.Add64(r[1], 10917124144477883021, carry)
-				r[2], carry = bits.Add64(r[2], 13281191951274694749, carry)
-				r[3], _ = bits.Add64(r[3], 3486998266802970665, carry)
+	w := uint64(y)
+	allNeg := uint64(y >> 63)	// -1 if y < 0, 0 o.w
 
-			}
+	// s[0], s[1] so results are not stored immediately in z.
+	// x[i] will be needed in the i+1 th iteration. We don't want to overwrite it in case x = z
+	var s [2]uint64
+	var h [2]uint64
+
+	h[0], s[0] = bits.Mul64(x[0], w)
+
+	c := uint64(0)
+	b := uint64(0)
+
+		{
+			const curI = 1 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 1 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[1], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
 		}
-		if (u[0] == 1) && (u[3]|u[2]|u[1]) == 0 {
-			return z.Set(&r)
+
+		{
+			const curI = 2 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 2 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[2], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
 		}
-		if (v[0] == 1) && (v[3]|v[2]|v[1]) == 0 {
-			return z.Set(&s)
+
+		{
+			const curI = 3 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 3 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[3], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
 		}
+	{
+		const curI = 4 % 2
+		const prevI = 1 - curI
+		const iMinusOne = 3
+
+		s[curI], _ = bits.Sub64(h[prevI], allNeg & x[iMinusOne], b)
+		z[iMinusOne] = s[prevI]
+
+		return s[curI] + c
 	}
+}*/
+
+// Requires NoCarry
+func (z *Element) linearCombNonModular(x *Element, xC int64, y *Element, yC int64) uint64 {
+	var yTimes Element
+
+	yHi := yTimes.mulWRegular(y, yC)
+	xHi := z.mulWRegular(x, xC)
+
+	carry := uint64(0)
+	z[0], carry = bits.Add64(z[0], yTimes[0], carry)
+	z[1], carry = bits.Add64(z[1], yTimes[1], carry)
+	z[2], carry = bits.Add64(z[2], yTimes[2], carry)
+	z[3], carry = bits.Add64(z[3], yTimes[3], carry)
+
+	yHi, _ = bits.Add64(xHi, yHi, carry)
 
+	return yHi
 }
diff --git a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fp/element_exp.go b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fp/element_exp.go
new file mode 100644
index 00000000000..315a40b9fb9
--- /dev/null
+++ b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fp/element_exp.go
@@ -0,0 +1,802 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fp
+
+// expBySqrtExp is equivalent to z.Exp(x, c19139cb84c680a6e14116da060561765e05aa45a1c72a34f082305b61f3f52)
+//
+// uses github.com/mmcloughlin/addchain v0.4.0 to generate a shorter addition chain
+func (z *Element) expBySqrtExp(x Element) *Element {
+	// addition chain:
+	//
+	//	_10      = 2*1
+	//	_11      = 1 + _10
+	//	_101     = _10 + _11
+	//	_110     = 1 + _101
+	//	_111     = 1 + _110
+	//	_1011    = _101 + _110
+	//	_1100    = 1 + _1011
+	//	_1101    = 1 + _1100
+	//	_1111    = _10 + _1101
+	//	_10001   = _10 + _1111
+	//	_10011   = _10 + _10001
+	//	_10111   = _110 + _10001
+	//	_11001   = _10 + _10111
+	//	_11011   = _10 + _11001
+	//	_11111   = _110 + _11001
+	//	_100011  = _1100 + _10111
+	//	_100111  = _1100 + _11011
+	//	_101001  = _10 + _100111
+	//	_101011  = _10 + _101001
+	//	_101101  = _10 + _101011
+	//	_111001  = _1100 + _101101
+	//	_1100000 = _100111 + _111001
+	//	i46      = ((_1100000 << 5 + _11001) << 9 + _100111) << 8
+	//	i62      = ((_111001 + i46) << 4 + _111) << 9 + _10011
+	//	i89      = ((i62 << 7 + _1101) << 13 + _101001) << 5
+	//	i109     = ((_10111 + i89) << 7 + _101) << 10 + _10001
+	//	i130     = ((i109 << 6 + _11011) << 5 + _1101) << 8
+	//	i154     = ((_11 + i130) << 12 + _101011) << 9 + _10111
+	//	i179     = ((i154 << 6 + _11001) << 5 + _1111) << 12
+	//	i198     = ((_101101 + i179) << 7 + _101001) << 9 + _101101
+	//	i220     = ((i198 << 7 + _111) << 9 + _111001) << 4
+	//	i236     = ((_101 + i220) << 7 + _1101) << 6 + _1111
+	//	i265     = ((i236 << 5 + 1) << 11 + _100011) << 11
+	//	i281     = ((_101101 + i265) << 4 + _1011) << 9 + _11111
+	//	i299     = (i281 << 8 + _110 + _111001) << 7 + _101001
+	//	return     2*i299
+	//
+	// Operations: 246 squares 54 multiplies
+
+	// Allocate Temporaries.
+	var (
+		t0  = new(Element)
+		t1  = new(Element)
+		t2  = new(Element)
+		t3  = new(Element)
+		t4  = new(Element)
+		t5  = new(Element)
+		t6  = new(Element)
+		t7  = new(Element)
+		t8  = new(Element)
+		t9  = new(Element)
+		t10 = new(Element)
+		t11 = new(Element)
+		t12 = new(Element)
+		t13 = new(Element)
+		t14 = new(Element)
+		t15 = new(Element)
+		t16 = new(Element)
+		t17 = new(Element)
+		t18 = new(Element)
+	)
+
+	// var t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14,t15,t16,t17,t18 Element
+	// Step 1: t4 = x^0x2
+	t4.Square(&x)
+
+	// Step 2: t13 = x^0x3
+	t13.Mul(&x, t4)
+
+	// Step 3: t8 = x^0x5
+	t8.Mul(t4, t13)
+
+	// Step 4: t1 = x^0x6
+	t1.Mul(&x, t8)
+
+	// Step 5: t9 = x^0x7
+	t9.Mul(&x, t1)
+
+	// Step 6: t3 = x^0xb
+	t3.Mul(t8, t1)
+
+	// Step 7: t0 = x^0xc
+	t0.Mul(&x, t3)
+
+	// Step 8: t7 = x^0xd
+	t7.Mul(&x, t0)
+
+	// Step 9: t6 = x^0xf
+	t6.Mul(t4, t7)
+
+	// Step 10: t15 = x^0x11
+	t15.Mul(t4, t6)
+
+	// Step 11: t16 = x^0x13
+	t16.Mul(t4, t15)
+
+	// Step 12: t11 = x^0x17
+	t11.Mul(t1, t15)
+
+	// Step 13: t10 = x^0x19
+	t10.Mul(t4, t11)
+
+	// Step 14: t14 = x^0x1b
+	t14.Mul(t4, t10)
+
+	// Step 15: t2 = x^0x1f
+	t2.Mul(t1, t10)
+
+	// Step 16: t5 = x^0x23
+	t5.Mul(t0, t11)
+
+	// Step 17: t17 = x^0x27
+	t17.Mul(t0, t14)
+
+	// Step 18: z = x^0x29
+	z.Mul(t4, t17)
+
+	// Step 19: t12 = x^0x2b
+	t12.Mul(t4, z)
+
+	// Step 20: t4 = x^0x2d
+	t4.Mul(t4, t12)
+
+	// Step 21: t0 = x^0x39
+	t0.Mul(t0, t4)
+
+	// Step 22: t18 = x^0x60
+	t18.Mul(t17, t0)
+
+	// Step 27: t18 = x^0xc00
+	for s := 0; s < 5; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 28: t18 = x^0xc19
+	t18.Mul(t10, t18)
+
+	// Step 37: t18 = x^0x183200
+	for s := 0; s < 9; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 38: t17 = x^0x183227
+	t17.Mul(t17, t18)
+
+	// Step 46: t17 = x^0x18322700
+	for s := 0; s < 8; s++ {
+		t17.Square(t17)
+	}
+
+	// Step 47: t17 = x^0x18322739
+	t17.Mul(t0, t17)
+
+	// Step 51: t17 = x^0x183227390
+	for s := 0; s < 4; s++ {
+		t17.Square(t17)
+	}
+
+	// Step 52: t17 = x^0x183227397
+	t17.Mul(t9, t17)
+
+	// Step 61: t17 = x^0x30644e72e00
+	for s := 0; s < 9; s++ {
+		t17.Square(t17)
+	}
+
+	// Step 62: t16 = x^0x30644e72e13
+	t16.Mul(t16, t17)
+
+	// Step 69: t16 = x^0x1832273970980
+	for s := 0; s < 7; s++ {
+		t16.Square(t16)
+	}
+
+	// Step 70: t16 = x^0x183227397098d
+	t16.Mul(t7, t16)
+
+	// Step 83: t16 = x^0x30644e72e131a000
+	for s := 0; s < 13; s++ {
+		t16.Square(t16)
+	}
+
+	// Step 84: t16 = x^0x30644e72e131a029
+	t16.Mul(z, t16)
+
+	// Step 89: t16 = x^0x60c89ce5c26340520
+	for s := 0; s < 5; s++ {
+		t16.Square(t16)
+	}
+
+	// Step 90: t16 = x^0x60c89ce5c26340537
+	t16.Mul(t11, t16)
+
+	// Step 97: t16 = x^0x30644e72e131a029b80
+	for s := 0; s < 7; s++ {
+		t16.Square(t16)
+	}
+
+	// Step 98: t16 = x^0x30644e72e131a029b85
+	t16.Mul(t8, t16)
+
+	// Step 108: t16 = x^0xc19139cb84c680a6e1400
+	for s := 0; s < 10; s++ {
+		t16.Square(t16)
+	}
+
+	// Step 109: t15 = x^0xc19139cb84c680a6e1411
+	t15.Mul(t15, t16)
+
+	// Step 115: t15 = x^0x30644e72e131a029b850440
+	for s := 0; s < 6; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 116: t14 = x^0x30644e72e131a029b85045b
+	t14.Mul(t14, t15)
+
+	// Step 121: t14 = x^0x60c89ce5c263405370a08b60
+	for s := 0; s < 5; s++ {
+		t14.Square(t14)
+	}
+
+	// Step 122: t14 = x^0x60c89ce5c263405370a08b6d
+	t14.Mul(t7, t14)
+
+	// Step 130: t14 = x^0x60c89ce5c263405370a08b6d00
+	for s := 0; s < 8; s++ {
+		t14.Square(t14)
+	}
+
+	// Step 131: t13 = x^0x60c89ce5c263405370a08b6d03
+	t13.Mul(t13, t14)
+
+	// Step 143: t13 = x^0x60c89ce5c263405370a08b6d03000
+	for s := 0; s < 12; s++ {
+		t13.Square(t13)
+	}
+
+	// Step 144: t12 = x^0x60c89ce5c263405370a08b6d0302b
+	t12.Mul(t12, t13)
+
+	// Step 153: t12 = x^0xc19139cb84c680a6e14116da0605600
+	for s := 0; s < 9; s++ {
+		t12.Square(t12)
+	}
+
+	// Step 154: t11 = x^0xc19139cb84c680a6e14116da0605617
+	t11.Mul(t11, t12)
+
+	// Step 160: t11 = x^0x30644e72e131a029b85045b68181585c0
+	for s := 0; s < 6; s++ {
+		t11.Square(t11)
+	}
+
+	// Step 161: t10 = x^0x30644e72e131a029b85045b68181585d9
+	t10.Mul(t10, t11)
+
+	// Step 166: t10 = x^0x60c89ce5c263405370a08b6d0302b0bb20
+	for s := 0; s < 5; s++ {
+		t10.Square(t10)
+	}
+
+	// Step 167: t10 = x^0x60c89ce5c263405370a08b6d0302b0bb2f
+	t10.Mul(t6, t10)
+
+	// Step 179: t10 = x^0x60c89ce5c263405370a08b6d0302b0bb2f000
+	for s := 0; s < 12; s++ {
+		t10.Square(t10)
+	}
+
+	// Step 180: t10 = x^0x60c89ce5c263405370a08b6d0302b0bb2f02d
+	t10.Mul(t4, t10)
+
+	// Step 187: t10 = x^0x30644e72e131a029b85045b68181585d9781680
+	for s := 0; s < 7; s++ {
+		t10.Square(t10)
+	}
+
+	// Step 188: t10 = x^0x30644e72e131a029b85045b68181585d97816a9
+	t10.Mul(z, t10)
+
+	// Step 197: t10 = x^0x60c89ce5c263405370a08b6d0302b0bb2f02d5200
+	for s := 0; s < 9; s++ {
+		t10.Square(t10)
+	}
+
+	// Step 198: t10 = x^0x60c89ce5c263405370a08b6d0302b0bb2f02d522d
+	t10.Mul(t4, t10)
+
+	// Step 205: t10 = x^0x30644e72e131a029b85045b68181585d97816a91680
+	for s := 0; s < 7; s++ {
+		t10.Square(t10)
+	}
+
+	// Step 206: t9 = x^0x30644e72e131a029b85045b68181585d97816a91687
+	t9.Mul(t9, t10)
+
+	// Step 215: t9 = x^0x60c89ce5c263405370a08b6d0302b0bb2f02d522d0e00
+	for s := 0; s < 9; s++ {
+		t9.Square(t9)
+	}
+
+	// Step 216: t9 = x^0x60c89ce5c263405370a08b6d0302b0bb2f02d522d0e39
+	t9.Mul(t0, t9)
+
+	// Step 220: t9 = x^0x60c89ce5c263405370a08b6d0302b0bb2f02d522d0e390
+	for s := 0; s < 4; s++ {
+		t9.Square(t9)
+	}
+
+	// Step 221: t8 = x^0x60c89ce5c263405370a08b6d0302b0bb2f02d522d0e395
+	t8.Mul(t8, t9)
+
+	// Step 228: t8 = x^0x30644e72e131a029b85045b68181585d97816a916871ca80
+	for s := 0; s < 7; s++ {
+		t8.Square(t8)
+	}
+
+	// Step 229: t7 = x^0x30644e72e131a029b85045b68181585d97816a916871ca8d
+	t7.Mul(t7, t8)
+
+	// Step 235: t7 = x^0xc19139cb84c680a6e14116da060561765e05aa45a1c72a340
+	for s := 0; s < 6; s++ {
+		t7.Square(t7)
+	}
+
+	// Step 236: t6 = x^0xc19139cb84c680a6e14116da060561765e05aa45a1c72a34f
+	t6.Mul(t6, t7)
+
+	// Step 241: t6 = x^0x183227397098d014dc2822db40c0ac2ecbc0b548b438e5469e0
+	for s := 0; s < 5; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 242: t6 = x^0x183227397098d014dc2822db40c0ac2ecbc0b548b438e5469e1
+	t6.Mul(&x, t6)
+
+	// Step 253: t6 = x^0xc19139cb84c680a6e14116da060561765e05aa45a1c72a34f0800
+	for s := 0; s < 11; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 254: t5 = x^0xc19139cb84c680a6e14116da060561765e05aa45a1c72a34f0823
+	t5.Mul(t5, t6)
+
+	// Step 265: t5 = x^0x60c89ce5c263405370a08b6d0302b0bb2f02d522d0e3951a78411800
+	for s := 0; s < 11; s++ {
+		t5.Square(t5)
+	}
+
+	// Step 266: t4 = x^0x60c89ce5c263405370a08b6d0302b0bb2f02d522d0e3951a7841182d
+	t4.Mul(t4, t5)
+
+	// Step 270: t4 = x^0x60c89ce5c263405370a08b6d0302b0bb2f02d522d0e3951a7841182d0
+	for s := 0; s < 4; s++ {
+		t4.Square(t4)
+	}
+
+	// Step 271: t3 = x^0x60c89ce5c263405370a08b6d0302b0bb2f02d522d0e3951a7841182db
+	t3.Mul(t3, t4)
+
+	// Step 280: t3 = x^0xc19139cb84c680a6e14116da060561765e05aa45a1c72a34f082305b600
+	for s := 0; s < 9; s++ {
+		t3.Square(t3)
+	}
+
+	// Step 281: t2 = x^0xc19139cb84c680a6e14116da060561765e05aa45a1c72a34f082305b61f
+	t2.Mul(t2, t3)
+
+	// Step 289: t2 = x^0xc19139cb84c680a6e14116da060561765e05aa45a1c72a34f082305b61f00
+	for s := 0; s < 8; s++ {
+		t2.Square(t2)
+	}
+
+	// Step 290: t1 = x^0xc19139cb84c680a6e14116da060561765e05aa45a1c72a34f082305b61f06
+	t1.Mul(t1, t2)
+
+	// Step 291: t0 = x^0xc19139cb84c680a6e14116da060561765e05aa45a1c72a34f082305b61f3f
+	t0.Mul(t0, t1)
+
+	// Step 298: t0 = x^0x60c89ce5c263405370a08b6d0302b0bb2f02d522d0e3951a7841182db0f9f80
+	for s := 0; s < 7; s++ {
+		t0.Square(t0)
+	}
+
+	// Step 299: z = x^0x60c89ce5c263405370a08b6d0302b0bb2f02d522d0e3951a7841182db0f9fa9
+	z.Mul(z, t0)
+
+	// Step 300: z = x^0xc19139cb84c680a6e14116da060561765e05aa45a1c72a34f082305b61f3f52
+	z.Square(z)
+
+	return z
+}
+
+// expByLegendreExp is equivalent to z.Exp(x, 183227397098d014dc2822db40c0ac2ecbc0b548b438e5469e10460b6c3e7ea3)
+//
+// uses github.com/mmcloughlin/addchain v0.4.0 to generate a shorter addition chain
+func (z *Element) expByLegendreExp(x Element) *Element {
+	// addition chain:
+	//
+	//	_10       = 2*1
+	//	_11       = 1 + _10
+	//	_101      = _10 + _11
+	//	_110      = 1 + _101
+	//	_1000     = _10 + _110
+	//	_1101     = _101 + _1000
+	//	_10010    = _101 + _1101
+	//	_10011    = 1 + _10010
+	//	_10100    = 1 + _10011
+	//	_10111    = _11 + _10100
+	//	_11100    = _101 + _10111
+	//	_100000   = _1101 + _10011
+	//	_100011   = _11 + _100000
+	//	_101011   = _1000 + _100011
+	//	_101111   = _10011 + _11100
+	//	_1000001  = _10010 + _101111
+	//	_1010011  = _10010 + _1000001
+	//	_1011011  = _1000 + _1010011
+	//	_1100001  = _110 + _1011011
+	//	_1110101  = _10100 + _1100001
+	//	_10010001 = _11100 + _1110101
+	//	_10010101 = _100000 + _1110101
+	//	_10110101 = _100000 + _10010101
+	//	_10111011 = _110 + _10110101
+	//	_11000001 = _110 + _10111011
+	//	_11000011 = _10 + _11000001
+	//	_11010011 = _10010 + _11000001
+	//	_11100001 = _100000 + _11000001
+	//	_11100011 = _10 + _11100001
+	//	_11100111 = _110 + _11100001
+	//	i57       = ((_11000001 << 8 + _10010001) << 10 + _11100111) << 7
+	//	i76       = ((_10111 + i57) << 9 + _10011) << 7 + _1101
+	//	i109      = ((i76 << 14 + _1010011) << 9 + _11100001) << 8
+	//	i127      = ((_1000001 + i109) << 10 + _1011011) << 5 + _1101
+	//	i161      = ((i127 << 8 + _11) << 12 + _101011) << 12
+	//	i186      = ((_10111011 + i161) << 8 + _101111) << 14 + _10110101
+	//	i214      = ((i186 << 9 + _10010001) << 5 + _1101) << 12
+	//	i236      = ((_11100011 + i214) << 8 + _10010101) << 11 + _11010011
+	//	i268      = ((i236 << 7 + _1100001) << 11 + _100011) << 12
+	//	i288      = ((_1011011 + i268) << 9 + _11000011) << 8 + _11100111
+	//	return      (i288 << 7 + _1110101) << 5 + _11
+	//
+	// Operations: 246 squares 56 multiplies
+
+	// Allocate Temporaries.
+	var (
+		t0  = new(Element)
+		t1  = new(Element)
+		t2  = new(Element)
+		t3  = new(Element)
+		t4  = new(Element)
+		t5  = new(Element)
+		t6  = new(Element)
+		t7  = new(Element)
+		t8  = new(Element)
+		t9  = new(Element)
+		t10 = new(Element)
+		t11 = new(Element)
+		t12 = new(Element)
+		t13 = new(Element)
+		t14 = new(Element)
+		t15 = new(Element)
+		t16 = new(Element)
+		t17 = new(Element)
+		t18 = new(Element)
+		t19 = new(Element)
+		t20 = new(Element)
+	)
+
+	// var t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14,t15,t16,t17,t18,t19,t20 Element
+	// Step 1: t8 = x^0x2
+	t8.Square(&x)
+
+	// Step 2: z = x^0x3
+	z.Mul(&x, t8)
+
+	// Step 3: t2 = x^0x5
+	t2.Mul(t8, z)
+
+	// Step 4: t1 = x^0x6
+	t1.Mul(&x, t2)
+
+	// Step 5: t3 = x^0x8
+	t3.Mul(t8, t1)
+
+	// Step 6: t9 = x^0xd
+	t9.Mul(t2, t3)
+
+	// Step 7: t6 = x^0x12
+	t6.Mul(t2, t9)
+
+	// Step 8: t18 = x^0x13
+	t18.Mul(&x, t6)
+
+	// Step 9: t0 = x^0x14
+	t0.Mul(&x, t18)
+
+	// Step 10: t19 = x^0x17
+	t19.Mul(z, t0)
+
+	// Step 11: t2 = x^0x1c
+	t2.Mul(t2, t19)
+
+	// Step 12: t16 = x^0x20
+	t16.Mul(t9, t18)
+
+	// Step 13: t4 = x^0x23
+	t4.Mul(z, t16)
+
+	// Step 14: t14 = x^0x2b
+	t14.Mul(t3, t4)
+
+	// Step 15: t12 = x^0x2f
+	t12.Mul(t18, t2)
+
+	// Step 16: t15 = x^0x41
+	t15.Mul(t6, t12)
+
+	// Step 17: t17 = x^0x53
+	t17.Mul(t6, t15)
+
+	// Step 18: t3 = x^0x5b
+	t3.Mul(t3, t17)
+
+	// Step 19: t5 = x^0x61
+	t5.Mul(t1, t3)
+
+	// Step 20: t0 = x^0x75
+	t0.Mul(t0, t5)
+
+	// Step 21: t10 = x^0x91
+	t10.Mul(t2, t0)
+
+	// Step 22: t7 = x^0x95
+	t7.Mul(t16, t0)
+
+	// Step 23: t11 = x^0xb5
+	t11.Mul(t16, t7)
+
+	// Step 24: t13 = x^0xbb
+	t13.Mul(t1, t11)
+
+	// Step 25: t20 = x^0xc1
+	t20.Mul(t1, t13)
+
+	// Step 26: t2 = x^0xc3
+	t2.Mul(t8, t20)
+
+	// Step 27: t6 = x^0xd3
+	t6.Mul(t6, t20)
+
+	// Step 28: t16 = x^0xe1
+	t16.Mul(t16, t20)
+
+	// Step 29: t8 = x^0xe3
+	t8.Mul(t8, t16)
+
+	// Step 30: t1 = x^0xe7
+	t1.Mul(t1, t16)
+
+	// Step 38: t20 = x^0xc100
+	for s := 0; s < 8; s++ {
+		t20.Square(t20)
+	}
+
+	// Step 39: t20 = x^0xc191
+	t20.Mul(t10, t20)
+
+	// Step 49: t20 = x^0x3064400
+	for s := 0; s < 10; s++ {
+		t20.Square(t20)
+	}
+
+	// Step 50: t20 = x^0x30644e7
+	t20.Mul(t1, t20)
+
+	// Step 57: t20 = x^0x183227380
+	for s := 0; s < 7; s++ {
+		t20.Square(t20)
+	}
+
+	// Step 58: t19 = x^0x183227397
+	t19.Mul(t19, t20)
+
+	// Step 67: t19 = x^0x30644e72e00
+	for s := 0; s < 9; s++ {
+		t19.Square(t19)
+	}
+
+	// Step 68: t18 = x^0x30644e72e13
+	t18.Mul(t18, t19)
+
+	// Step 75: t18 = x^0x1832273970980
+	for s := 0; s < 7; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 76: t18 = x^0x183227397098d
+	t18.Mul(t9, t18)
+
+	// Step 90: t18 = x^0x60c89ce5c2634000
+	for s := 0; s < 14; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 91: t17 = x^0x60c89ce5c2634053
+	t17.Mul(t17, t18)
+
+	// Step 100: t17 = x^0xc19139cb84c680a600
+	for s := 0; s < 9; s++ {
+		t17.Square(t17)
+	}
+
+	// Step 101: t16 = x^0xc19139cb84c680a6e1
+	t16.Mul(t16, t17)
+
+	// Step 109: t16 = x^0xc19139cb84c680a6e100
+	for s := 0; s < 8; s++ {
+		t16.Square(t16)
+	}
+
+	// Step 110: t15 = x^0xc19139cb84c680a6e141
+	t15.Mul(t15, t16)
+
+	// Step 120: t15 = x^0x30644e72e131a029b850400
+	for s := 0; s < 10; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 121: t15 = x^0x30644e72e131a029b85045b
+	t15.Mul(t3, t15)
+
+	// Step 126: t15 = x^0x60c89ce5c263405370a08b60
+	for s := 0; s < 5; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 127: t15 = x^0x60c89ce5c263405370a08b6d
+	t15.Mul(t9, t15)
+
+	// Step 135: t15 = x^0x60c89ce5c263405370a08b6d00
+	for s := 0; s < 8; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 136: t15 = x^0x60c89ce5c263405370a08b6d03
+	t15.Mul(z, t15)
+
+	// Step 148: t15 = x^0x60c89ce5c263405370a08b6d03000
+	for s := 0; s < 12; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 149: t14 = x^0x60c89ce5c263405370a08b6d0302b
+	t14.Mul(t14, t15)
+
+	// Step 161: t14 = x^0x60c89ce5c263405370a08b6d0302b000
+	for s := 0; s < 12; s++ {
+		t14.Square(t14)
+	}
+
+	// Step 162: t13 = x^0x60c89ce5c263405370a08b6d0302b0bb
+	t13.Mul(t13, t14)
+
+	// Step 170: t13 = x^0x60c89ce5c263405370a08b6d0302b0bb00
+	for s := 0; s < 8; s++ {
+		t13.Square(t13)
+	}
+
+	// Step 171: t12 = x^0x60c89ce5c263405370a08b6d0302b0bb2f
+	t12.Mul(t12, t13)
+
+	// Step 185: t12 = x^0x183227397098d014dc2822db40c0ac2ecbc000
+	for s := 0; s < 14; s++ {
+		t12.Square(t12)
+	}
+
+	// Step 186: t11 = x^0x183227397098d014dc2822db40c0ac2ecbc0b5
+	t11.Mul(t11, t12)
+
+	// Step 195: t11 = x^0x30644e72e131a029b85045b68181585d97816a00
+	for s := 0; s < 9; s++ {
+		t11.Square(t11)
+	}
+
+	// Step 196: t10 = x^0x30644e72e131a029b85045b68181585d97816a91
+	t10.Mul(t10, t11)
+
+	// Step 201: t10 = x^0x60c89ce5c263405370a08b6d0302b0bb2f02d5220
+	for s := 0; s < 5; s++ {
+		t10.Square(t10)
+	}
+
+	// Step 202: t9 = x^0x60c89ce5c263405370a08b6d0302b0bb2f02d522d
+	t9.Mul(t9, t10)
+
+	// Step 214: t9 = x^0x60c89ce5c263405370a08b6d0302b0bb2f02d522d000
+	for s := 0; s < 12; s++ {
+		t9.Square(t9)
+	}
+
+	// Step 215: t8 = x^0x60c89ce5c263405370a08b6d0302b0bb2f02d522d0e3
+	t8.Mul(t8, t9)
+
+	// Step 223: t8 = x^0x60c89ce5c263405370a08b6d0302b0bb2f02d522d0e300
+	for s := 0; s < 8; s++ {
+		t8.Square(t8)
+	}
+
+	// Step 224: t7 = x^0x60c89ce5c263405370a08b6d0302b0bb2f02d522d0e395
+	t7.Mul(t7, t8)
+
+	// Step 235: t7 = x^0x30644e72e131a029b85045b68181585d97816a916871ca800
+	for s := 0; s < 11; s++ {
+		t7.Square(t7)
+	}
+
+	// Step 236: t6 = x^0x30644e72e131a029b85045b68181585d97816a916871ca8d3
+	t6.Mul(t6, t7)
+
+	// Step 243: t6 = x^0x183227397098d014dc2822db40c0ac2ecbc0b548b438e546980
+	for s := 0; s < 7; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 244: t5 = x^0x183227397098d014dc2822db40c0ac2ecbc0b548b438e5469e1
+	t5.Mul(t5, t6)
+
+	// Step 255: t5 = x^0xc19139cb84c680a6e14116da060561765e05aa45a1c72a34f0800
+	for s := 0; s < 11; s++ {
+		t5.Square(t5)
+	}
+
+	// Step 256: t4 = x^0xc19139cb84c680a6e14116da060561765e05aa45a1c72a34f0823
+	t4.Mul(t4, t5)
+
+	// Step 268: t4 = x^0xc19139cb84c680a6e14116da060561765e05aa45a1c72a34f0823000
+	for s := 0; s < 12; s++ {
+		t4.Square(t4)
+	}
+
+	// Step 269: t3 = x^0xc19139cb84c680a6e14116da060561765e05aa45a1c72a34f082305b
+	t3.Mul(t3, t4)
+
+	// Step 278: t3 = x^0x183227397098d014dc2822db40c0ac2ecbc0b548b438e5469e10460b600
+	for s := 0; s < 9; s++ {
+		t3.Square(t3)
+	}
+
+	// Step 279: t2 = x^0x183227397098d014dc2822db40c0ac2ecbc0b548b438e5469e10460b6c3
+	t2.Mul(t2, t3)
+
+	// Step 287: t2 = x^0x183227397098d014dc2822db40c0ac2ecbc0b548b438e5469e10460b6c300
+	for s := 0; s < 8; s++ {
+		t2.Square(t2)
+	}
+
+	// Step 288: t1 = x^0x183227397098d014dc2822db40c0ac2ecbc0b548b438e5469e10460b6c3e7
+	t1.Mul(t1, t2)
+
+	// Step 295: t1 = x^0xc19139cb84c680a6e14116da060561765e05aa45a1c72a34f082305b61f380
+	for s := 0; s < 7; s++ {
+		t1.Square(t1)
+	}
+
+	// Step 296: t0 = x^0xc19139cb84c680a6e14116da060561765e05aa45a1c72a34f082305b61f3f5
+	t0.Mul(t0, t1)
+
+	// Step 301: t0 = x^0x183227397098d014dc2822db40c0ac2ecbc0b548b438e5469e10460b6c3e7ea0
+	for s := 0; s < 5; s++ {
+		t0.Square(t0)
+	}
+
+	// Step 302: z = x^0x183227397098d014dc2822db40c0ac2ecbc0b548b438e5469e10460b6c3e7ea3
+	z.Mul(z, t0)
+
+	return z
+}
diff --git a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fp/element_fuzz.go b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fp/element_fuzz.go
new file mode 100644
index 00000000000..d18bd4bb40c
--- /dev/null
+++ b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fp/element_fuzz.go
@@ -0,0 +1,136 @@
+//go:build gofuzz
+// +build gofuzz
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fp
+
+import (
+	"bytes"
+	"encoding/binary"
+	"io"
+	"math/big"
+	"math/bits"
+)
+
+const (
+	fuzzInteresting = 1
+	fuzzNormal      = 0
+	fuzzDiscard     = -1
+)
+
+// Fuzz arithmetic operations fuzzer
+func Fuzz(data []byte) int {
+	r := bytes.NewReader(data)
+
+	var e1, e2 Element
+	e1.SetRawBytes(r)
+	e2.SetRawBytes(r)
+
+	{
+		// mul assembly
+
+		var c, _c Element
+		a, _a, b, _b := e1, e1, e2, e2
+		c.Mul(&a, &b)
+		_mulGeneric(&_c, &_a, &_b)
+
+		if !c.Equal(&_c) {
+			panic("mul asm != mul generic on Element")
+		}
+	}
+
+	{
+		// inverse
+		inv := e1
+		inv.Inverse(&inv)
+
+		var bInv, b1, b2 big.Int
+		e1.ToBigIntRegular(&b1)
+		bInv.ModInverse(&b1, Modulus())
+		inv.ToBigIntRegular(&b2)
+
+		if b2.Cmp(&bInv) != 0 {
+			panic("inverse operation doesn't match big int result")
+		}
+	}
+
+	{
+		// a + -a == 0
+		a, b := e1, e1
+		b.Neg(&b)
+		a.Add(&a, &b)
+		if !a.IsZero() {
+			panic("a + -a != 0")
+		}
+	}
+
+	return fuzzNormal
+
+}
+
+// SetRawBytes reads up to Bytes (bytes needed to represent Element) from reader
+// and interpret it as big endian uint64
+// used for fuzzing purposes only
+func (z *Element) SetRawBytes(r io.Reader) {
+
+	buf := make([]byte, 8)
+
+	for i := 0; i < len(z); i++ {
+		if _, err := io.ReadFull(r, buf); err != nil {
+			goto eof
+		}
+		z[i] = binary.BigEndian.Uint64(buf[:])
+	}
+eof:
+	z[3] %= qElement[3]
+
+	if z.BiggerModulus() {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], qElement[0], 0)
+		z[1], b = bits.Sub64(z[1], qElement[1], b)
+		z[2], b = bits.Sub64(z[2], qElement[2], b)
+		z[3], b = bits.Sub64(z[3], qElement[3], b)
+	}
+
+	return
+}
+
+func (z *Element) BiggerModulus() bool {
+	if z[3] > qElement[3] {
+		return true
+	}
+	if z[3] < qElement[3] {
+		return false
+	}
+
+	if z[2] > qElement[2] {
+		return true
+	}
+	if z[2] < qElement[2] {
+		return false
+	}
+
+	if z[1] > qElement[1] {
+		return true
+	}
+	if z[1] < qElement[1] {
+		return false
+	}
+
+	return z[0] >= qElement[0]
+}
diff --git a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fp/element_mul_adx_amd64.s b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fp/element_mul_adx_amd64.s
index 2465dbb936b..65c040e36b6 100644
--- a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fp/element_mul_adx_amd64.s
+++ b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fp/element_mul_adx_amd64.s
@@ -46,7 +46,7 @@ GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
 TEXT ·mul(SB), NOSPLIT, $0-24
 
 	// the algorithm is described here
-	// https://hackmd.io/@zkteam/modular_multiplication
+	// https://hackmd.io/@gnark/modular_multiplication
 	// however, to benefit from the ADCX and ADOX carry chains
 	// we split the inner loops in 2:
 	// for i=0 to N-1
@@ -72,7 +72,7 @@ TEXT ·mul(SB), NOSPLIT, $0-24
 
 	// A -> BP
 	// t[0] -> R14
-	// t[1] -> R15
+	// t[1] -> R13
 	// t[2] -> CX
 	// t[3] -> BX
 	// clear the flags
@@ -80,11 +80,11 @@ TEXT ·mul(SB), NOSPLIT, $0-24
 	MOVQ 0(R11), DX
 
 	// (A,t[0])  := x[0]*y[0] + A
-	MULXQ DI, R14, R15
+	MULXQ DI, R14, R13
 
 	// (A,t[1])  := x[1]*y[0] + A
 	MULXQ R8, AX, CX
-	ADOXQ AX, R15
+	ADOXQ AX, R13
 
 	// (A,t[2])  := x[2]*y[0] + A
 	MULXQ R9, AX, BX
@@ -111,14 +111,14 @@ TEXT ·mul(SB), NOSPLIT, $0-24
 	MOVQ  R12, R14
 
 	// (C,t[0]) := t[1] + m*q[1] + C
-	ADCXQ R15, R14
-	MULXQ q<>+8(SB), AX, R15
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
 	ADOXQ AX, R14
 
 	// (C,t[1]) := t[2] + m*q[2] + C
-	ADCXQ CX, R15
+	ADCXQ CX, R13
 	MULXQ q<>+16(SB), AX, CX
-	ADOXQ AX, R15
+	ADOXQ AX, R13
 
 	// (C,t[2]) := t[3] + m*q[3] + C
 	ADCXQ BX, CX
@@ -139,9 +139,9 @@ TEXT ·mul(SB), NOSPLIT, $0-24
 	ADOXQ AX, R14
 
 	// (A,t[1])  := t[1] + x[1]*y[1] + A
-	ADCXQ BP, R15
+	ADCXQ BP, R13
 	MULXQ R8, AX, BP
-	ADOXQ AX, R15
+	ADOXQ AX, R13
 
 	// (A,t[2])  := t[2] + x[2]*y[1] + A
 	ADCXQ BP, CX
@@ -171,14 +171,14 @@ TEXT ·mul(SB), NOSPLIT, $0-24
 	MOVQ  R12, R14
 
 	// (C,t[0]) := t[1] + m*q[1] + C
-	ADCXQ R15, R14
-	MULXQ q<>+8(SB), AX, R15
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
 	ADOXQ AX, R14
 
 	// (C,t[1]) := t[2] + m*q[2] + C
-	ADCXQ CX, R15
+	ADCXQ CX, R13
 	MULXQ q<>+16(SB), AX, CX
-	ADOXQ AX, R15
+	ADOXQ AX, R13
 
 	// (C,t[2]) := t[3] + m*q[3] + C
 	ADCXQ BX, CX
@@ -199,9 +199,9 @@ TEXT ·mul(SB), NOSPLIT, $0-24
 	ADOXQ AX, R14
 
 	// (A,t[1])  := t[1] + x[1]*y[2] + A
-	ADCXQ BP, R15
+	ADCXQ BP, R13
 	MULXQ R8, AX, BP
-	ADOXQ AX, R15
+	ADOXQ AX, R13
 
 	// (A,t[2])  := t[2] + x[2]*y[2] + A
 	ADCXQ BP, CX
@@ -231,14 +231,14 @@ TEXT ·mul(SB), NOSPLIT, $0-24
 	MOVQ  R12, R14
 
 	// (C,t[0]) := t[1] + m*q[1] + C
-	ADCXQ R15, R14
-	MULXQ q<>+8(SB), AX, R15
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
 	ADOXQ AX, R14
 
 	// (C,t[1]) := t[2] + m*q[2] + C
-	ADCXQ CX, R15
+	ADCXQ CX, R13
 	MULXQ q<>+16(SB), AX, CX
-	ADOXQ AX, R15
+	ADOXQ AX, R13
 
 	// (C,t[2]) := t[3] + m*q[3] + C
 	ADCXQ BX, CX
@@ -259,9 +259,9 @@ TEXT ·mul(SB), NOSPLIT, $0-24
 	ADOXQ AX, R14
 
 	// (A,t[1])  := t[1] + x[1]*y[3] + A
-	ADCXQ BP, R15
+	ADCXQ BP, R13
 	MULXQ R8, AX, BP
-	ADOXQ AX, R15
+	ADOXQ AX, R13
 
 	// (A,t[2])  := t[2] + x[2]*y[3] + A
 	ADCXQ BP, CX
@@ -291,14 +291,14 @@ TEXT ·mul(SB), NOSPLIT, $0-24
 	MOVQ  R12, R14
 
 	// (C,t[0]) := t[1] + m*q[1] + C
-	ADCXQ R15, R14
-	MULXQ q<>+8(SB), AX, R15
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
 	ADOXQ AX, R14
 
 	// (C,t[1]) := t[2] + m*q[2] + C
-	ADCXQ CX, R15
+	ADCXQ CX, R13
 	MULXQ q<>+16(SB), AX, CX
-	ADOXQ AX, R15
+	ADOXQ AX, R13
 
 	// (C,t[2]) := t[3] + m*q[3] + C
 	ADCXQ BX, CX
@@ -310,12 +310,12 @@ TEXT ·mul(SB), NOSPLIT, $0-24
 	ADCXQ AX, BX
 	ADOXQ BP, BX
 
-	// reduce element(R14,R15,CX,BX) using temp registers (R13,SI,R12,R11)
-	REDUCE(R14,R15,CX,BX,R13,SI,R12,R11)
+	// reduce element(R14,R13,CX,BX) using temp registers (SI,R12,R11,DI)
+	REDUCE(R14,R13,CX,BX,SI,R12,R11,DI)
 
 	MOVQ res+0(FP), AX
 	MOVQ R14, 0(AX)
-	MOVQ R15, 8(AX)
+	MOVQ R13, 8(AX)
 	MOVQ CX, 16(AX)
 	MOVQ BX, 24(AX)
 	RET
@@ -323,7 +323,7 @@ TEXT ·mul(SB), NOSPLIT, $0-24
 TEXT ·fromMont(SB), NOSPLIT, $0-8
 
 	// the algorithm is described here
-	// https://hackmd.io/@zkteam/modular_multiplication
+	// https://hackmd.io/@gnark/modular_multiplication
 	// when y = 1 we have:
 	// for i=0 to N-1
 	// 		t[i] = x[i]
@@ -335,7 +335,7 @@ TEXT ·fromMont(SB), NOSPLIT, $0-8
 	// 		t[N-1] = C
 	MOVQ res+0(FP), DX
 	MOVQ 0(DX), R14
-	MOVQ 8(DX), R15
+	MOVQ 8(DX), R13
 	MOVQ 16(DX), CX
 	MOVQ 24(DX), BX
 	XORQ DX, DX
@@ -351,14 +351,14 @@ TEXT ·fromMont(SB), NOSPLIT, $0-8
 	MOVQ  BP, R14
 
 	// (C,t[0]) := t[1] + m*q[1] + C
-	ADCXQ R15, R14
-	MULXQ q<>+8(SB), AX, R15
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
 	ADOXQ AX, R14
 
 	// (C,t[1]) := t[2] + m*q[2] + C
-	ADCXQ CX, R15
+	ADCXQ CX, R13
 	MULXQ q<>+16(SB), AX, CX
-	ADOXQ AX, R15
+	ADOXQ AX, R13
 
 	// (C,t[2]) := t[3] + m*q[3] + C
 	ADCXQ BX, CX
@@ -380,14 +380,14 @@ TEXT ·fromMont(SB), NOSPLIT, $0-8
 	MOVQ  BP, R14
 
 	// (C,t[0]) := t[1] + m*q[1] + C
-	ADCXQ R15, R14
-	MULXQ q<>+8(SB), AX, R15
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
 	ADOXQ AX, R14
 
 	// (C,t[1]) := t[2] + m*q[2] + C
-	ADCXQ CX, R15
+	ADCXQ CX, R13
 	MULXQ q<>+16(SB), AX, CX
-	ADOXQ AX, R15
+	ADOXQ AX, R13
 
 	// (C,t[2]) := t[3] + m*q[3] + C
 	ADCXQ BX, CX
@@ -409,14 +409,14 @@ TEXT ·fromMont(SB), NOSPLIT, $0-8
 	MOVQ  BP, R14
 
 	// (C,t[0]) := t[1] + m*q[1] + C
-	ADCXQ R15, R14
-	MULXQ q<>+8(SB), AX, R15
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
 	ADOXQ AX, R14
 
 	// (C,t[1]) := t[2] + m*q[2] + C
-	ADCXQ CX, R15
+	ADCXQ CX, R13
 	MULXQ q<>+16(SB), AX, CX
-	ADOXQ AX, R15
+	ADOXQ AX, R13
 
 	// (C,t[2]) := t[3] + m*q[3] + C
 	ADCXQ BX, CX
@@ -438,14 +438,14 @@ TEXT ·fromMont(SB), NOSPLIT, $0-8
 	MOVQ  BP, R14
 
 	// (C,t[0]) := t[1] + m*q[1] + C
-	ADCXQ R15, R14
-	MULXQ q<>+8(SB), AX, R15
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
 	ADOXQ AX, R14
 
 	// (C,t[1]) := t[2] + m*q[2] + C
-	ADCXQ CX, R15
+	ADCXQ CX, R13
 	MULXQ q<>+16(SB), AX, CX
-	ADOXQ AX, R15
+	ADOXQ AX, R13
 
 	// (C,t[2]) := t[3] + m*q[3] + C
 	ADCXQ BX, CX
@@ -455,12 +455,12 @@ TEXT ·fromMont(SB), NOSPLIT, $0-8
 	ADCXQ AX, BX
 	ADOXQ AX, BX
 
-	// reduce element(R14,R15,CX,BX) using temp registers (SI,DI,R8,R9)
-	REDUCE(R14,R15,CX,BX,SI,DI,R8,R9)
+	// reduce element(R14,R13,CX,BX) using temp registers (SI,DI,R8,R9)
+	REDUCE(R14,R13,CX,BX,SI,DI,R8,R9)
 
 	MOVQ res+0(FP), AX
 	MOVQ R14, 0(AX)
-	MOVQ R15, 8(AX)
+	MOVQ R13, 8(AX)
 	MOVQ CX, 16(AX)
 	MOVQ BX, 24(AX)
 	RET
diff --git a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fp/element_mul_amd64.s b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fp/element_mul_amd64.s
index bd64044052f..54af6c35873 100644
--- a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fp/element_mul_amd64.s
+++ b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fp/element_mul_amd64.s
@@ -46,7 +46,7 @@ GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
 TEXT ·mul(SB), $24-24
 
 	// the algorithm is described here
-	// https://hackmd.io/@zkteam/modular_multiplication
+	// https://hackmd.io/@gnark/modular_multiplication
 	// however, to benefit from the ADCX and ADOX carry chains
 	// we split the inner loops in 2:
 	// for i=0 to N-1
@@ -75,7 +75,7 @@ TEXT ·mul(SB), $24-24
 
 	// A -> BP
 	// t[0] -> R14
-	// t[1] -> R15
+	// t[1] -> R13
 	// t[2] -> CX
 	// t[3] -> BX
 	// clear the flags
@@ -83,11 +83,11 @@ TEXT ·mul(SB), $24-24
 	MOVQ 0(R11), DX
 
 	// (A,t[0])  := x[0]*y[0] + A
-	MULXQ DI, R14, R15
+	MULXQ DI, R14, R13
 
 	// (A,t[1])  := x[1]*y[0] + A
 	MULXQ R8, AX, CX
-	ADOXQ AX, R15
+	ADOXQ AX, R13
 
 	// (A,t[2])  := x[2]*y[0] + A
 	MULXQ R9, AX, BX
@@ -114,14 +114,14 @@ TEXT ·mul(SB), $24-24
 	MOVQ  R12, R14
 
 	// (C,t[0]) := t[1] + m*q[1] + C
-	ADCXQ R15, R14
-	MULXQ q<>+8(SB), AX, R15
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
 	ADOXQ AX, R14
 
 	// (C,t[1]) := t[2] + m*q[2] + C
-	ADCXQ CX, R15
+	ADCXQ CX, R13
 	MULXQ q<>+16(SB), AX, CX
-	ADOXQ AX, R15
+	ADOXQ AX, R13
 
 	// (C,t[2]) := t[3] + m*q[3] + C
 	ADCXQ BX, CX
@@ -142,9 +142,9 @@ TEXT ·mul(SB), $24-24
 	ADOXQ AX, R14
 
 	// (A,t[1])  := t[1] + x[1]*y[1] + A
-	ADCXQ BP, R15
+	ADCXQ BP, R13
 	MULXQ R8, AX, BP
-	ADOXQ AX, R15
+	ADOXQ AX, R13
 
 	// (A,t[2])  := t[2] + x[2]*y[1] + A
 	ADCXQ BP, CX
@@ -174,14 +174,14 @@ TEXT ·mul(SB), $24-24
 	MOVQ  R12, R14
 
 	// (C,t[0]) := t[1] + m*q[1] + C
-	ADCXQ R15, R14
-	MULXQ q<>+8(SB), AX, R15
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
 	ADOXQ AX, R14
 
 	// (C,t[1]) := t[2] + m*q[2] + C
-	ADCXQ CX, R15
+	ADCXQ CX, R13
 	MULXQ q<>+16(SB), AX, CX
-	ADOXQ AX, R15
+	ADOXQ AX, R13
 
 	// (C,t[2]) := t[3] + m*q[3] + C
 	ADCXQ BX, CX
@@ -202,9 +202,9 @@ TEXT ·mul(SB), $24-24
 	ADOXQ AX, R14
 
 	// (A,t[1])  := t[1] + x[1]*y[2] + A
-	ADCXQ BP, R15
+	ADCXQ BP, R13
 	MULXQ R8, AX, BP
-	ADOXQ AX, R15
+	ADOXQ AX, R13
 
 	// (A,t[2])  := t[2] + x[2]*y[2] + A
 	ADCXQ BP, CX
@@ -234,14 +234,14 @@ TEXT ·mul(SB), $24-24
 	MOVQ  R12, R14
 
 	// (C,t[0]) := t[1] + m*q[1] + C
-	ADCXQ R15, R14
-	MULXQ q<>+8(SB), AX, R15
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
 	ADOXQ AX, R14
 
 	// (C,t[1]) := t[2] + m*q[2] + C
-	ADCXQ CX, R15
+	ADCXQ CX, R13
 	MULXQ q<>+16(SB), AX, CX
-	ADOXQ AX, R15
+	ADOXQ AX, R13
 
 	// (C,t[2]) := t[3] + m*q[3] + C
 	ADCXQ BX, CX
@@ -262,9 +262,9 @@ TEXT ·mul(SB), $24-24
 	ADOXQ AX, R14
 
 	// (A,t[1])  := t[1] + x[1]*y[3] + A
-	ADCXQ BP, R15
+	ADCXQ BP, R13
 	MULXQ R8, AX, BP
-	ADOXQ AX, R15
+	ADOXQ AX, R13
 
 	// (A,t[2])  := t[2] + x[2]*y[3] + A
 	ADCXQ BP, CX
@@ -294,14 +294,14 @@ TEXT ·mul(SB), $24-24
 	MOVQ  R12, R14
 
 	// (C,t[0]) := t[1] + m*q[1] + C
-	ADCXQ R15, R14
-	MULXQ q<>+8(SB), AX, R15
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
 	ADOXQ AX, R14
 
 	// (C,t[1]) := t[2] + m*q[2] + C
-	ADCXQ CX, R15
+	ADCXQ CX, R13
 	MULXQ q<>+16(SB), AX, CX
-	ADOXQ AX, R15
+	ADOXQ AX, R13
 
 	// (C,t[2]) := t[3] + m*q[3] + C
 	ADCXQ BX, CX
@@ -313,12 +313,12 @@ TEXT ·mul(SB), $24-24
 	ADCXQ AX, BX
 	ADOXQ BP, BX
 
-	// reduce element(R14,R15,CX,BX) using temp registers (R13,SI,R12,R11)
-	REDUCE(R14,R15,CX,BX,R13,SI,R12,R11)
+	// reduce element(R14,R13,CX,BX) using temp registers (SI,R12,R11,DI)
+	REDUCE(R14,R13,CX,BX,SI,R12,R11,DI)
 
 	MOVQ res+0(FP), AX
 	MOVQ R14, 0(AX)
-	MOVQ R15, 8(AX)
+	MOVQ R13, 8(AX)
 	MOVQ CX, 16(AX)
 	MOVQ BX, 24(AX)
 	RET
@@ -337,7 +337,7 @@ TEXT ·fromMont(SB), $8-8
 	NO_LOCAL_POINTERS
 
 	// the algorithm is described here
-	// https://hackmd.io/@zkteam/modular_multiplication
+	// https://hackmd.io/@gnark/modular_multiplication
 	// when y = 1 we have:
 	// for i=0 to N-1
 	// 		t[i] = x[i]
@@ -351,7 +351,7 @@ TEXT ·fromMont(SB), $8-8
 	JNE  l2
 	MOVQ res+0(FP), DX
 	MOVQ 0(DX), R14
-	MOVQ 8(DX), R15
+	MOVQ 8(DX), R13
 	MOVQ 16(DX), CX
 	MOVQ 24(DX), BX
 	XORQ DX, DX
@@ -367,14 +367,14 @@ TEXT ·fromMont(SB), $8-8
 	MOVQ  BP, R14
 
 	// (C,t[0]) := t[1] + m*q[1] + C
-	ADCXQ R15, R14
-	MULXQ q<>+8(SB), AX, R15
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
 	ADOXQ AX, R14
 
 	// (C,t[1]) := t[2] + m*q[2] + C
-	ADCXQ CX, R15
+	ADCXQ CX, R13
 	MULXQ q<>+16(SB), AX, CX
-	ADOXQ AX, R15
+	ADOXQ AX, R13
 
 	// (C,t[2]) := t[3] + m*q[3] + C
 	ADCXQ BX, CX
@@ -396,14 +396,14 @@ TEXT ·fromMont(SB), $8-8
 	MOVQ  BP, R14
 
 	// (C,t[0]) := t[1] + m*q[1] + C
-	ADCXQ R15, R14
-	MULXQ q<>+8(SB), AX, R15
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
 	ADOXQ AX, R14
 
 	// (C,t[1]) := t[2] + m*q[2] + C
-	ADCXQ CX, R15
+	ADCXQ CX, R13
 	MULXQ q<>+16(SB), AX, CX
-	ADOXQ AX, R15
+	ADOXQ AX, R13
 
 	// (C,t[2]) := t[3] + m*q[3] + C
 	ADCXQ BX, CX
@@ -425,14 +425,14 @@ TEXT ·fromMont(SB), $8-8
 	MOVQ  BP, R14
 
 	// (C,t[0]) := t[1] + m*q[1] + C
-	ADCXQ R15, R14
-	MULXQ q<>+8(SB), AX, R15
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
 	ADOXQ AX, R14
 
 	// (C,t[1]) := t[2] + m*q[2] + C
-	ADCXQ CX, R15
+	ADCXQ CX, R13
 	MULXQ q<>+16(SB), AX, CX
-	ADOXQ AX, R15
+	ADOXQ AX, R13
 
 	// (C,t[2]) := t[3] + m*q[3] + C
 	ADCXQ BX, CX
@@ -454,14 +454,14 @@ TEXT ·fromMont(SB), $8-8
 	MOVQ  BP, R14
 
 	// (C,t[0]) := t[1] + m*q[1] + C
-	ADCXQ R15, R14
-	MULXQ q<>+8(SB), AX, R15
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
 	ADOXQ AX, R14
 
 	// (C,t[1]) := t[2] + m*q[2] + C
-	ADCXQ CX, R15
+	ADCXQ CX, R13
 	MULXQ q<>+16(SB), AX, CX
-	ADOXQ AX, R15
+	ADOXQ AX, R13
 
 	// (C,t[2]) := t[3] + m*q[3] + C
 	ADCXQ BX, CX
@@ -471,12 +471,12 @@ TEXT ·fromMont(SB), $8-8
 	ADCXQ AX, BX
 	ADOXQ AX, BX
 
-	// reduce element(R14,R15,CX,BX) using temp registers (SI,DI,R8,R9)
-	REDUCE(R14,R15,CX,BX,SI,DI,R8,R9)
+	// reduce element(R14,R13,CX,BX) using temp registers (SI,DI,R8,R9)
+	REDUCE(R14,R13,CX,BX,SI,DI,R8,R9)
 
 	MOVQ res+0(FP), AX
 	MOVQ R14, 0(AX)
-	MOVQ R15, 8(AX)
+	MOVQ R13, 8(AX)
 	MOVQ CX, 16(AX)
 	MOVQ BX, 24(AX)
 	RET
diff --git a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fp/element_ops_amd64.go b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fp/element_ops_amd64.go
index 71b26855b49..73a3711ec07 100644
--- a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fp/element_ops_amd64.go
+++ b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fp/element_ops_amd64.go
@@ -22,6 +22,9 @@ func MulBy3(x *Element)
 //go:noescape
 func MulBy5(x *Element)
 
+//go:noescape
+func MulBy13(x *Element)
+
 //go:noescape
 func add(res, x, y *Element)
 
@@ -42,3 +45,6 @@ func fromMont(res *Element)
 
 //go:noescape
 func reduce(res *Element)
+
+//go:noescape
+func Butterfly(a, b *Element)
diff --git a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fp/element_ops_amd64.s b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fp/element_ops_amd64.s
index 833c807ece8..5b8b1f0e76b 100644
--- a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fp/element_ops_amd64.s
+++ b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fp/element_ops_amd64.s
@@ -233,3 +233,108 @@ TEXT ·MulBy5(SB), NOSPLIT, $0-8
 	MOVQ BX, 16(AX)
 	MOVQ SI, 24(AX)
 	RET
+
+// MulBy13(x *Element)
+TEXT ·MulBy13(SB), NOSPLIT, $0-8
+	MOVQ x+0(FP), AX
+	MOVQ 0(AX), DX
+	MOVQ 8(AX), CX
+	MOVQ 16(AX), BX
+	MOVQ 24(AX), SI
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+
+	// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
+
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+
+	// reduce element(DX,CX,BX,SI) using temp registers (R11,R12,R13,R14)
+	REDUCE(DX,CX,BX,SI,R11,R12,R13,R14)
+
+	MOVQ DX, R11
+	MOVQ CX, R12
+	MOVQ BX, R13
+	MOVQ SI, R14
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+
+	// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
+
+	ADDQ R11, DX
+	ADCQ R12, CX
+	ADCQ R13, BX
+	ADCQ R14, SI
+
+	// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
+
+	ADDQ 0(AX), DX
+	ADCQ 8(AX), CX
+	ADCQ 16(AX), BX
+	ADCQ 24(AX), SI
+
+	// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
+
+	MOVQ DX, 0(AX)
+	MOVQ CX, 8(AX)
+	MOVQ BX, 16(AX)
+	MOVQ SI, 24(AX)
+	RET
+
+// Butterfly(a, b *Element) sets a = a + b; b = a - b
+TEXT ·Butterfly(SB), NOSPLIT, $0-16
+	MOVQ    a+0(FP), AX
+	MOVQ    0(AX), CX
+	MOVQ    8(AX), BX
+	MOVQ    16(AX), SI
+	MOVQ    24(AX), DI
+	MOVQ    CX, R8
+	MOVQ    BX, R9
+	MOVQ    SI, R10
+	MOVQ    DI, R11
+	XORQ    AX, AX
+	MOVQ    b+8(FP), DX
+	ADDQ    0(DX), CX
+	ADCQ    8(DX), BX
+	ADCQ    16(DX), SI
+	ADCQ    24(DX), DI
+	SUBQ    0(DX), R8
+	SBBQ    8(DX), R9
+	SBBQ    16(DX), R10
+	SBBQ    24(DX), R11
+	MOVQ    $0x3c208c16d87cfd47, R12
+	MOVQ    $0x97816a916871ca8d, R13
+	MOVQ    $0xb85045b68181585d, R14
+	MOVQ    $0x30644e72e131a029, R15
+	CMOVQCC AX, R12
+	CMOVQCC AX, R13
+	CMOVQCC AX, R14
+	CMOVQCC AX, R15
+	ADDQ    R12, R8
+	ADCQ    R13, R9
+	ADCQ    R14, R10
+	ADCQ    R15, R11
+	MOVQ    R8, 0(DX)
+	MOVQ    R9, 8(DX)
+	MOVQ    R10, 16(DX)
+	MOVQ    R11, 24(DX)
+
+	// reduce element(CX,BX,SI,DI) using temp registers (R8,R9,R10,R11)
+	REDUCE(CX,BX,SI,DI,R8,R9,R10,R11)
+
+	MOVQ a+0(FP), AX
+	MOVQ CX, 0(AX)
+	MOVQ BX, 8(AX)
+	MOVQ SI, 16(AX)
+	MOVQ DI, 24(AX)
+	RET
diff --git a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fp/element_ops_noasm.go b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fp/element_ops_noasm.go
index e6ced1bf566..fec62891833 100644
--- a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fp/element_ops_noasm.go
+++ b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fp/element_ops_noasm.go
@@ -1,3 +1,4 @@
+//go:build !amd64
 // +build !amd64
 
 // Copyright 2020 ConsenSys Software Inc.
@@ -34,6 +35,18 @@ func MulBy5(x *Element) {
 	mulByConstant(x, 5)
 }
 
+// MulBy13 x *= 13
+func MulBy13(x *Element) {
+	mulByConstant(x, 13)
+}
+
+// Butterfly sets
+// a = a + b
+// b = a - b
+func Butterfly(a, b *Element) {
+	_butterflyGeneric(a, b)
+}
+
 func mul(z, x, y *Element) {
 	_mulGeneric(z, x, y)
 }
diff --git a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/asm.go b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/asm.go
index f859dd8731d..8241357c459 100644
--- a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/asm.go
+++ b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/asm.go
@@ -1,3 +1,4 @@
+//go:build !noadx
 // +build !noadx
 
 // Copyright 2020 ConsenSys Software Inc.
diff --git a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/asm_noadx.go b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/asm_noadx.go
index ab9b869b5b4..221beab937e 100644
--- a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/asm_noadx.go
+++ b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/asm_noadx.go
@@ -1,3 +1,4 @@
+//go:build noadx
 // +build noadx
 
 // Copyright 2020 ConsenSys Software Inc.
diff --git a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/doc.go b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/doc.go
new file mode 100644
index 00000000000..e1dea9c74e4
--- /dev/null
+++ b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/doc.go
@@ -0,0 +1,43 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+// Package fr contains field arithmetic operations for modulus = 0x30644e...000001.
+//
+// The API is similar to math/big (big.Int), but the operations are significantly faster (up to 20x for the modular multiplication on amd64, see also https://hackmd.io/@gnark/modular_multiplication)
+//
+// The modulus is hardcoded in all the operations.
+//
+// Field elements are represented as an array, and assumed to be in Montgomery form in all methods:
+// 	type Element [4]uint64
+//
+// Example API signature
+// 	// Mul z = x * y mod q
+// 	func (z *Element) Mul(x, y *Element) *Element
+//
+// and can be used like so:
+// 	var a, b Element
+// 	a.SetUint64(2)
+// 	b.SetString("984896738")
+// 	a.Mul(a, b)
+// 	a.Sub(a, a)
+// 	 .Add(a, b)
+// 	 .Inv(a)
+// 	b.Exp(b, new(big.Int).SetUint64(42))
+//
+// Modulus
+// 	0x30644e72e131a029b85045b68181585d2833e84879b9709143e1f593f0000001 // base 16
+// 	21888242871839275222246405745257275088548364400416034343698204186575808495617 // base 10
+package fr
diff --git a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/element.go b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/element.go
index 3759baa5848..b323c190684 100644
--- a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/element.go
+++ b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/element.go
@@ -14,7 +14,6 @@
 
 // Code generated by consensys/gnark-crypto DO NOT EDIT
 
-// Package fr contains field arithmetic operations for modulus 21888242871839275222246405745257275088548364400416034343698204186575808495617
 package fr
 
 // /!\ WARNING /!\
@@ -26,10 +25,13 @@ package fr
 import (
 	"crypto/rand"
 	"encoding/binary"
+	"errors"
 	"io"
 	"math/big"
 	"math/bits"
+	"reflect"
 	"strconv"
+	"strings"
 	"sync"
 )
 
@@ -61,13 +63,21 @@ func Modulus() *big.Int {
 }
 
 // q (modulus)
+const qElementWord0 uint64 = 4891460686036598785
+const qElementWord1 uint64 = 2896914383306846353
+const qElementWord2 uint64 = 13281191951274694749
+const qElementWord3 uint64 = 3486998266802970665
+
 var qElement = Element{
-	4891460686036598785,
-	2896914383306846353,
-	13281191951274694749,
-	3486998266802970665,
+	qElementWord0,
+	qElementWord1,
+	qElementWord2,
+	qElementWord3,
 }
 
+// Used for Montgomery reduction. (qInvNeg) q + r'.r = 1, i.e., qInvNeg = - q⁻¹ mod r
+const qInvNegLsw uint64 = 14042775128853446655
+
 // rSquare
 var rSquare = Element{
 	1997599621687373223,
@@ -86,12 +96,39 @@ func init() {
 	_modulus.SetString("21888242871839275222246405745257275088548364400416034343698204186575808495617", 10)
 }
 
-// SetUint64 z = v, sets z LSB to v (non-Montgomery form) and convert z to Montgomery form
+// NewElement returns a new Element from a uint64 value
+//
+// it is equivalent to
+// 		var v NewElement
+// 		v.SetUint64(...)
+func NewElement(v uint64) Element {
+	z := Element{v}
+	z.Mul(&z, &rSquare)
+	return z
+}
+
+// SetUint64 sets z to v and returns z
 func (z *Element) SetUint64(v uint64) *Element {
+	//  sets z LSB to v (non-Montgomery form) and convert z to Montgomery form
 	*z = Element{v}
 	return z.Mul(z, &rSquare) // z.ToMont()
 }
 
+// SetInt64 sets z to v and returns z
+func (z *Element) SetInt64(v int64) *Element {
+
+	// absolute value of v
+	m := v >> 63
+	z.SetUint64(uint64((v ^ m) - m))
+
+	if m != 0 {
+		// v is negative
+		z.Neg(z)
+	}
+
+	return z
+}
+
 // Set z = x
 func (z *Element) Set(x *Element) *Element {
 	z[0] = x[0]
@@ -101,28 +138,46 @@ func (z *Element) Set(x *Element) *Element {
 	return z
 }
 
-// SetInterface converts i1 from uint64, int, string, or Element, big.Int into Element
-// panic if provided type is not supported
-func (z *Element) SetInterface(i1 interface{}) *Element {
+// SetInterface converts provided interface into Element
+// returns an error if provided type is not supported
+// supported types: Element, *Element, uint64, int, string (interpreted as base10 integer),
+// *big.Int, big.Int, []byte
+func (z *Element) SetInterface(i1 interface{}) (*Element, error) {
 	switch c1 := i1.(type) {
 	case Element:
-		return z.Set(&c1)
+		return z.Set(&c1), nil
 	case *Element:
-		return z.Set(c1)
+		return z.Set(c1), nil
+	case uint8:
+		return z.SetUint64(uint64(c1)), nil
+	case uint16:
+		return z.SetUint64(uint64(c1)), nil
+	case uint32:
+		return z.SetUint64(uint64(c1)), nil
+	case uint:
+		return z.SetUint64(uint64(c1)), nil
 	case uint64:
-		return z.SetUint64(c1)
+		return z.SetUint64(c1), nil
+	case int8:
+		return z.SetInt64(int64(c1)), nil
+	case int16:
+		return z.SetInt64(int64(c1)), nil
+	case int32:
+		return z.SetInt64(int64(c1)), nil
+	case int64:
+		return z.SetInt64(c1), nil
 	case int:
-		return z.SetString(strconv.Itoa(c1))
+		return z.SetInt64(int64(c1)), nil
 	case string:
-		return z.SetString(c1)
+		return z.SetString(c1), nil
 	case *big.Int:
-		return z.SetBigInt(c1)
+		return z.SetBigInt(c1), nil
 	case big.Int:
-		return z.SetBigInt(&c1)
+		return z.SetBigInt(&c1), nil
 	case []byte:
-		return z.SetBytes(c1)
+		return z.SetBytes(c1), nil
 	default:
-		panic("invalid type")
+		return nil, errors.New("can't set fr.Element from type " + reflect.TypeOf(i1).String())
 	}
 }
 
@@ -152,6 +207,16 @@ func (z *Element) Div(x, y *Element) *Element {
 	return z
 }
 
+// Bit returns the i'th bit, with lsb == bit 0.
+// It is the responsability of the caller to convert from Montgomery to Regular form if needed
+func (z *Element) Bit(i uint64) uint64 {
+	j := i / 64
+	if j >= 4 {
+		return 0
+	}
+	return uint64(z[j] >> (i % 64) & 1)
+}
+
 // Equal returns z == x
 func (z *Element) Equal(x *Element) bool {
 	return (z[3] == x[3]) && (z[2] == x[2]) && (z[1] == x[1]) && (z[0] == x[0])
@@ -162,6 +227,11 @@ func (z *Element) IsZero() bool {
 	return (z[3] | z[2] | z[1] | z[0]) == 0
 }
 
+// IsUint64 reports whether z can be represented as an uint64.
+func (z *Element) IsUint64() bool {
+	return (z[3] | z[2] | z[1]) == 0
+}
+
 // Cmp compares (lexicographic order) z and x and returns:
 //
 //   -1 if z <  x
@@ -227,7 +297,7 @@ func (z *Element) SetRandom() (*Element, error) {
 	z[3] = binary.BigEndian.Uint64(bytes[24:32])
 	z[3] %= 3486998266802970665
 
-	// if z > q --> z -= q
+	// if z > q → z -= q
 	// note: this is NOT constant time
 	if !(z[3] < 3486998266802970665 || (z[3] == 3486998266802970665 && (z[2] < 13281191951274694749 || (z[2] == 13281191951274694749 && (z[1] < 2896914383306846353 || (z[1] == 2896914383306846353 && (z[0] < 4891460686036598785))))))) {
 		var b uint64
@@ -247,35 +317,39 @@ func One() Element {
 	return one
 }
 
-// MulAssign is deprecated
-// Deprecated: use Mul instead
-func (z *Element) MulAssign(x *Element) *Element {
-	return z.Mul(z, x)
-}
+// Halve sets z to z / 2 (mod p)
+func (z *Element) Halve() {
+	if z[0]&1 == 1 {
+		var carry uint64
 
-// AddAssign is deprecated
-// Deprecated: use Add instead
-func (z *Element) AddAssign(x *Element) *Element {
-	return z.Add(z, x)
-}
+		// z = z + q
+		z[0], carry = bits.Add64(z[0], 4891460686036598785, 0)
+		z[1], carry = bits.Add64(z[1], 2896914383306846353, carry)
+		z[2], carry = bits.Add64(z[2], 13281191951274694749, carry)
+		z[3], _ = bits.Add64(z[3], 3486998266802970665, carry)
+
+	}
+
+	// z = z >> 1
+
+	z[0] = z[0]>>1 | z[1]<<63
+	z[1] = z[1]>>1 | z[2]<<63
+	z[2] = z[2]>>1 | z[3]<<63
+	z[3] >>= 1
 
-// SubAssign is deprecated
-// Deprecated: use Sub instead
-func (z *Element) SubAssign(x *Element) *Element {
-	return z.Sub(z, x)
 }
 
 // API with assembly impl
 
 // Mul z = x * y mod q
-// see https://hackmd.io/@zkteam/modular_multiplication
+// see https://hackmd.io/@gnark/modular_multiplication
 func (z *Element) Mul(x, y *Element) *Element {
 	mul(z, x, y)
 	return z
 }
 
 // Square z = x * x mod q
-// see https://hackmd.io/@zkteam/modular_multiplication
+// see https://hackmd.io/@gnark/modular_multiplication
 func (z *Element) Square(x *Element) *Element {
 	mul(z, x, x)
 	return z
@@ -371,7 +445,58 @@ func _mulGeneric(z, x, y *Element) {
 		z[3], z[2] = madd3(m, 3486998266802970665, c[0], c[2], c[1])
 	}
 
-	// if z > q --> z -= q
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[3] < 3486998266802970665 || (z[3] == 3486998266802970665 && (z[2] < 13281191951274694749 || (z[2] == 13281191951274694749 && (z[1] < 2896914383306846353 || (z[1] == 2896914383306846353 && (z[0] < 4891460686036598785))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 4891460686036598785, 0)
+		z[1], b = bits.Sub64(z[1], 2896914383306846353, b)
+		z[2], b = bits.Sub64(z[2], 13281191951274694749, b)
+		z[3], _ = bits.Sub64(z[3], 3486998266802970665, b)
+	}
+}
+
+func _mulWGeneric(z, x *Element, y uint64) {
+
+	var t [4]uint64
+	{
+		// round 0
+		c1, c0 := bits.Mul64(y, x[0])
+		m := c0 * 14042775128853446655
+		c2 := madd0(m, 4891460686036598785, c0)
+		c1, c0 = madd1(y, x[1], c1)
+		c2, t[0] = madd2(m, 2896914383306846353, c2, c0)
+		c1, c0 = madd1(y, x[2], c1)
+		c2, t[1] = madd2(m, 13281191951274694749, c2, c0)
+		c1, c0 = madd1(y, x[3], c1)
+		t[3], t[2] = madd3(m, 3486998266802970665, c0, c2, c1)
+	}
+	{
+		// round 1
+		m := t[0] * 14042775128853446655
+		c2 := madd0(m, 4891460686036598785, t[0])
+		c2, t[0] = madd2(m, 2896914383306846353, c2, t[1])
+		c2, t[1] = madd2(m, 13281191951274694749, c2, t[2])
+		t[3], t[2] = madd2(m, 3486998266802970665, t[3], c2)
+	}
+	{
+		// round 2
+		m := t[0] * 14042775128853446655
+		c2 := madd0(m, 4891460686036598785, t[0])
+		c2, t[0] = madd2(m, 2896914383306846353, c2, t[1])
+		c2, t[1] = madd2(m, 13281191951274694749, c2, t[2])
+		t[3], t[2] = madd2(m, 3486998266802970665, t[3], c2)
+	}
+	{
+		// round 3
+		m := t[0] * 14042775128853446655
+		c2 := madd0(m, 4891460686036598785, t[0])
+		c2, z[0] = madd2(m, 2896914383306846353, c2, t[1])
+		c2, z[1] = madd2(m, 13281191951274694749, c2, t[2])
+		z[3], z[2] = madd2(m, 3486998266802970665, t[3], c2)
+	}
+
+	// if z > q → z -= q
 	// note: this is NOT constant time
 	if !(z[3] < 3486998266802970665 || (z[3] == 3486998266802970665 && (z[2] < 13281191951274694749 || (z[2] == 13281191951274694749 && (z[1] < 2896914383306846353 || (z[1] == 2896914383306846353 && (z[0] < 4891460686036598785))))))) {
 		var b uint64
@@ -422,7 +547,7 @@ func _fromMontGeneric(z *Element) {
 		z[3] = C
 	}
 
-	// if z > q --> z -= q
+	// if z > q → z -= q
 	// note: this is NOT constant time
 	if !(z[3] < 3486998266802970665 || (z[3] == 3486998266802970665 && (z[2] < 13281191951274694749 || (z[2] == 13281191951274694749 && (z[1] < 2896914383306846353 || (z[1] == 2896914383306846353 && (z[0] < 4891460686036598785))))))) {
 		var b uint64
@@ -441,7 +566,7 @@ func _addGeneric(z, x, y *Element) {
 	z[2], carry = bits.Add64(x[2], y[2], carry)
 	z[3], _ = bits.Add64(x[3], y[3], carry)
 
-	// if z > q --> z -= q
+	// if z > q → z -= q
 	// note: this is NOT constant time
 	if !(z[3] < 3486998266802970665 || (z[3] == 3486998266802970665 && (z[2] < 13281191951274694749 || (z[2] == 13281191951274694749 && (z[1] < 2896914383306846353 || (z[1] == 2896914383306846353 && (z[0] < 4891460686036598785))))))) {
 		var b uint64
@@ -460,7 +585,7 @@ func _doubleGeneric(z, x *Element) {
 	z[2], carry = bits.Add64(x[2], x[2], carry)
 	z[3], _ = bits.Add64(x[3], x[3], carry)
 
-	// if z > q --> z -= q
+	// if z > q → z -= q
 	// note: this is NOT constant time
 	if !(z[3] < 3486998266802970665 || (z[3] == 3486998266802970665 && (z[2] < 13281191951274694749 || (z[2] == 13281191951274694749 && (z[1] < 2896914383306846353 || (z[1] == 2896914383306846353 && (z[0] < 4891460686036598785))))))) {
 		var b uint64
@@ -500,7 +625,7 @@ func _negGeneric(z, x *Element) {
 
 func _reduceGeneric(z *Element) {
 
-	// if z > q --> z -= q
+	// if z > q → z -= q
 	// note: this is NOT constant time
 	if !(z[3] < 3486998266802970665 || (z[3] == 3486998266802970665 && (z[2] < 13281191951274694749 || (z[2] == 13281191951274694749 && (z[1] < 2896914383306846353 || (z[1] == 2896914383306846353 && (z[0] < 4891460686036598785))))))) {
 		var b uint64
@@ -528,10 +653,66 @@ func mulByConstant(z *Element, c uint8) {
 		_z := *z
 		z.Double(z).Double(z).Add(z, &_z)
 	default:
-		panic("not implemented")
+		var y Element
+		y.SetUint64(uint64(c))
+		z.Mul(z, &y)
 	}
 }
 
+// BatchInvert returns a new slice with every element inverted.
+// Uses Montgomery batch inversion trick
+func BatchInvert(a []Element) []Element {
+	res := make([]Element, len(a))
+	if len(a) == 0 {
+		return res
+	}
+
+	zeroes := make([]bool, len(a))
+	accumulator := One()
+
+	for i := 0; i < len(a); i++ {
+		if a[i].IsZero() {
+			zeroes[i] = true
+			continue
+		}
+		res[i] = accumulator
+		accumulator.Mul(&accumulator, &a[i])
+	}
+
+	accumulator.Inverse(&accumulator)
+
+	for i := len(a) - 1; i >= 0; i-- {
+		if zeroes[i] {
+			continue
+		}
+		res[i].Mul(&res[i], &accumulator)
+		accumulator.Mul(&accumulator, &a[i])
+	}
+
+	return res
+}
+
+func _butterflyGeneric(a, b *Element) {
+	t := *a
+	a.Add(a, b)
+	b.Sub(&t, b)
+}
+
+// BitLen returns the minimum number of bits needed to represent z
+// returns 0 if z == 0
+func (z *Element) BitLen() int {
+	if z[3] != 0 {
+		return 192 + bits.Len64(z[3])
+	}
+	if z[2] != 0 {
+		return 128 + bits.Len64(z[2])
+	}
+	if z[1] != 0 {
+		return 64 + bits.Len64(z[1])
+	}
+	return bits.Len64(z[0])
+}
+
 // Exp z = x^exponent mod q
 func (z *Element) Exp(x Element, exponent *big.Int) *Element {
 	var bZero big.Int
@@ -552,7 +733,7 @@ func (z *Element) Exp(x Element, exponent *big.Int) *Element {
 }
 
 // ToMont converts z to Montgomery form
-// sets and returns z = z * r^2
+// sets and returns z = z * r²
 func (z *Element) ToMont() *Element {
 	return z.Mul(z, &rSquare)
 }
@@ -562,11 +743,41 @@ func (z Element) ToRegular() Element {
 	return *z.FromMont()
 }
 
-// String returns the string form of an Element in Montgomery form
+// String returns the decimal representation of z as generated by
+// z.Text(10).
 func (z *Element) String() string {
+	return z.Text(10)
+}
+
+// Text returns the string representation of z in the given base.
+// Base must be between 2 and 36, inclusive. The result uses the
+// lower-case letters 'a' to 'z' for digit values 10 to 35.
+// No prefix (such as "0x") is added to the string. If z is a nil
+// pointer it returns "<nil>".
+// If base == 10 and -z fits in a uint64 prefix "-" is added to the string.
+func (z *Element) Text(base int) string {
+	if base < 2 || base > 36 {
+		panic("invalid base")
+	}
+	if z == nil {
+		return "<nil>"
+	}
+	zz := *z
+	zz.FromMont()
+	if zz.IsUint64() {
+		return strconv.FormatUint(zz[0], base)
+	} else if base == 10 {
+		var zzNeg Element
+		zzNeg.Neg(z)
+		zzNeg.FromMont()
+		if zzNeg.IsUint64() {
+			return "-" + strconv.FormatUint(zzNeg[0], base)
+		}
+	}
 	vv := bigIntPool.Get().(*big.Int)
-	defer bigIntPool.Put(vv)
-	return z.ToBigIntRegular(vv).String()
+	r := zz.ToBigInt(vv).Text(base)
+	bigIntPool.Put(vv)
+	return r
 }
 
 // ToBigInt returns z as a big.Int in Montgomery form
@@ -598,6 +809,13 @@ func (z *Element) Bytes() (res [Limbs * 8]byte) {
 	return
 }
 
+// Marshal returns the regular (non montgomery) value
+// of z as a big-endian byte slice.
+func (z *Element) Marshal() []byte {
+	b := z.Bytes()
+	return b[:]
+}
+
 // SetBytes interprets e as the bytes of a big-endian unsigned integer,
 // sets z to that value (in Montgomery form), and returns z.
 func (z *Element) SetBytes(e []byte) *Element {
@@ -645,7 +863,7 @@ func (z *Element) SetBigInt(v *big.Int) *Element {
 	return z
 }
 
-// setBigInt assumes 0 <= v < q
+// setBigInt assumes 0 ⩽ v < q
 func (z *Element) setBigInt(v *big.Int) *Element {
 	vBits := v.Bits()
 
@@ -666,14 +884,30 @@ func (z *Element) setBigInt(v *big.Int) *Element {
 	return z.ToMont()
 }
 
-// SetString creates a big.Int with s (in base 10) and calls SetBigInt on z
-func (z *Element) SetString(s string) *Element {
+// SetString creates a big.Int with number and calls SetBigInt on z
+//
+// The number prefix determines the actual base: A prefix of
+// ''0b'' or ''0B'' selects base 2, ''0'', ''0o'' or ''0O'' selects base 8,
+// and ''0x'' or ''0X'' selects base 16. Otherwise, the selected base is 10
+// and no prefix is accepted.
+//
+// For base 16, lower and upper case letters are considered the same:
+// The letters 'a' to 'f' and 'A' to 'F' represent digit values 10 to 15.
+//
+// An underscore character ''_'' may appear between a base
+// prefix and an adjacent digit, and between successive digits; such
+// underscores do not change the value of the number.
+// Incorrect placement of underscores is reported as a panic if there
+// are no other errors.
+//
+func (z *Element) SetString(number string) *Element {
 	// get temporary big int from the pool
 	vv := bigIntPool.Get().(*big.Int)
 
-	if _, ok := vv.SetString(s, 10); !ok {
-		panic("Element.SetString failed -> can't parse number in base10 into a big.Int")
+	if _, ok := vv.SetString(number, 0); !ok {
+		panic("Element.SetString failed -> can't parse number into a big.Int " + number)
 	}
+
 	z.SetBigInt(vv)
 
 	// release object into pool
@@ -682,22 +916,59 @@ func (z *Element) SetString(s string) *Element {
 	return z
 }
 
-var (
-	_bLegendreExponentElement *big.Int
-	_bSqrtExponentElement     *big.Int
-)
+// MarshalJSON returns json encoding of z (z.Text(10))
+// If z == nil, returns null
+func (z *Element) MarshalJSON() ([]byte, error) {
+	if z == nil {
+		return []byte("null"), nil
+	}
+	const maxSafeBound = 15 // we encode it as number if it's small
+	s := z.Text(10)
+	if len(s) <= maxSafeBound {
+		return []byte(s), nil
+	}
+	var sbb strings.Builder
+	sbb.WriteByte('"')
+	sbb.WriteString(s)
+	sbb.WriteByte('"')
+	return []byte(sbb.String()), nil
+}
 
-func init() {
-	_bLegendreExponentElement, _ = new(big.Int).SetString("183227397098d014dc2822db40c0ac2e9419f4243cdcb848a1f0fac9f8000000", 16)
-	const sqrtExponentElement = "183227397098d014dc2822db40c0ac2e9419f4243cdcb848a1f0fac9f"
-	_bSqrtExponentElement, _ = new(big.Int).SetString(sqrtExponentElement, 16)
+// UnmarshalJSON accepts numbers and strings as input
+// See Element.SetString for valid prefixes (0x, 0b, ...)
+func (z *Element) UnmarshalJSON(data []byte) error {
+	s := string(data)
+	if len(s) > Bits*3 {
+		return errors.New("value too large (max = Element.Bits * 3)")
+	}
+
+	// we accept numbers and strings, remove leading and trailing quotes if any
+	if len(s) > 0 && s[0] == '"' {
+		s = s[1:]
+	}
+	if len(s) > 0 && s[len(s)-1] == '"' {
+		s = s[:len(s)-1]
+	}
+
+	// get temporary big int from the pool
+	vv := bigIntPool.Get().(*big.Int)
+
+	if _, ok := vv.SetString(s, 0); !ok {
+		return errors.New("can't parse into a big.Int: " + s)
+	}
+
+	z.SetBigInt(vv)
+
+	// release object into pool
+	bigIntPool.Put(vv)
+	return nil
 }
 
 // Legendre returns the Legendre symbol of z (either +1, -1, or 0.)
 func (z *Element) Legendre() int {
 	var l Element
 	// z^((q-1)/2)
-	l.Exp(*z, _bLegendreExponentElement)
+	l.expByLegendreExp(*z)
 
 	if l.IsZero() {
 		return 0
@@ -720,7 +991,7 @@ func (z *Element) Sqrt(x *Element) *Element {
 
 	var y, b, t, w Element
 	// w = x^((s-1)/2))
-	w.Exp(*x, _bSqrtExponentElement)
+	w.expBySqrtExp(*x)
 
 	// y = x^((s+1)/2)) = w * x
 	y.Mul(x, &w)
@@ -778,170 +1049,418 @@ func (z *Element) Sqrt(x *Element) *Element {
 	}
 }
 
-// Inverse z = x^-1 mod q
-// Algorithm 16 in "Efficient Software-Implementation of Finite Fields with Applications to Cryptography"
-// if x == 0, sets and returns z = x
-func (z *Element) Inverse(x *Element) *Element {
-	if x.IsZero() {
-		return z.Set(x)
+func max(a int, b int) int {
+	if a > b {
+		return a
 	}
+	return b
+}
 
-	// initialize u = q
-	var u = Element{
-		4891460686036598785,
-		2896914383306846353,
-		13281191951274694749,
-		3486998266802970665,
+func min(a int, b int) int {
+	if a < b {
+		return a
 	}
+	return b
+}
 
-	// initialize s = r^2
-	var s = Element{
-		1997599621687373223,
-		6052339484930628067,
-		10108755138030829701,
-		150537098327114917,
+const updateFactorsConversionBias int64 = 0x7fffffff7fffffff // (2³¹ - 1)(2³² + 1)
+const updateFactorIdentityMatrixRow0 = 1
+const updateFactorIdentityMatrixRow1 = 1 << 32
+
+func updateFactorsDecompose(c int64) (int64, int64) {
+	c += updateFactorsConversionBias
+	const low32BitsFilter int64 = 0xFFFFFFFF
+	f := c&low32BitsFilter - 0x7FFFFFFF
+	g := c>>32&low32BitsFilter - 0x7FFFFFFF
+	return f, g
+}
+
+const k = 32 // word size / 2
+const signBitSelector = uint64(1) << 63
+const approxLowBitsN = k - 1
+const approxHighBitsN = k + 1
+const inversionCorrectionFactorWord0 = 13488105295233737379
+const inversionCorrectionFactorWord1 = 17373395488625725466
+const inversionCorrectionFactorWord2 = 6831692495576925776
+const inversionCorrectionFactorWord3 = 3282329835997625403
+
+const invIterationsN = 18
+
+// Inverse z = x⁻¹ mod q
+// Implements "Optimized Binary GCD for Modular Inversion"
+// https://github.com/pornin/bingcd/blob/main/doc/bingcd.pdf
+func (z *Element) Inverse(x *Element) *Element {
+	if x.IsZero() {
+		z.SetZero()
+		return z
 	}
 
-	// r = 0
-	r := Element{}
+	a := *x
+	b := Element{
+		qElementWord0,
+		qElementWord1,
+		qElementWord2,
+		qElementWord3,
+	} // b := q
 
-	v := *x
+	u := Element{1}
 
-	var carry, borrow, t, t2 uint64
-	var bigger bool
+	// Update factors: we get [u; v]:= [f0 g0; f1 g1] [u; v]
+	// c_i = f_i + 2³¹ - 1 + 2³² * (g_i + 2³¹ - 1)
+	var c0, c1 int64
 
-	for {
-		for v[0]&1 == 0 {
-
-			// v = v >> 1
-			t2 = v[3] << 63
-			v[3] >>= 1
-			t = t2
-			t2 = v[2] << 63
-			v[2] = (v[2] >> 1) | t
-			t = t2
-			t2 = v[1] << 63
-			v[1] = (v[1] >> 1) | t
-			t = t2
-			v[0] = (v[0] >> 1) | t
-
-			if s[0]&1 == 1 {
-
-				// s = s + q
-				s[0], carry = bits.Add64(s[0], 4891460686036598785, 0)
-				s[1], carry = bits.Add64(s[1], 2896914383306846353, carry)
-				s[2], carry = bits.Add64(s[2], 13281191951274694749, carry)
-				s[3], _ = bits.Add64(s[3], 3486998266802970665, carry)
+	// Saved update factors to reduce the number of field multiplications
+	var pf0, pf1, pg0, pg1 int64
+
+	var i uint
+
+	var v, s Element
 
+	// Since u,v are updated every other iteration, we must make sure we terminate after evenly many iterations
+	// This also lets us get away with half as many updates to u,v
+	// To make this constant-time-ish, replace the condition with i < invIterationsN
+	for i = 0; i&1 == 1 || !a.IsZero(); i++ {
+		n := max(a.BitLen(), b.BitLen())
+		aApprox, bApprox := approximate(&a, n), approximate(&b, n)
+
+		// After 0 iterations, we have f₀ ≤ 2⁰ and f₁ < 2⁰
+		// f0, g0, f1, g1 = 1, 0, 0, 1
+		c0, c1 = updateFactorIdentityMatrixRow0, updateFactorIdentityMatrixRow1
+
+		for j := 0; j < approxLowBitsN; j++ {
+
+			if aApprox&1 == 0 {
+				aApprox /= 2
+			} else {
+				s, borrow := bits.Sub64(aApprox, bApprox, 0)
+				if borrow == 1 {
+					s = bApprox - aApprox
+					bApprox = aApprox
+					c0, c1 = c1, c0
+				}
+
+				aApprox = s / 2
+				c0 = c0 - c1
+
+				// Now |f₀| < 2ʲ + 2ʲ = 2ʲ⁺¹
+				// |f₁| ≤ 2ʲ still
 			}
 
-			// s = s >> 1
-			t2 = s[3] << 63
-			s[3] >>= 1
-			t = t2
-			t2 = s[2] << 63
-			s[2] = (s[2] >> 1) | t
-			t = t2
-			t2 = s[1] << 63
-			s[1] = (s[1] >> 1) | t
-			t = t2
-			s[0] = (s[0] >> 1) | t
+			c1 *= 2
+			// |f₁| ≤ 2ʲ⁺¹
+		}
+
+		s = a
 
+		var g0 int64
+		// from this point on c0 aliases for f0
+		c0, g0 = updateFactorsDecompose(c0)
+		aHi := a.linearCombNonModular(&s, c0, &b, g0)
+		if aHi&signBitSelector != 0 {
+			// if aHi < 0
+			c0, g0 = -c0, -g0
+			aHi = a.neg(&a, aHi)
 		}
-		for u[0]&1 == 0 {
-
-			// u = u >> 1
-			t2 = u[3] << 63
-			u[3] >>= 1
-			t = t2
-			t2 = u[2] << 63
-			u[2] = (u[2] >> 1) | t
-			t = t2
-			t2 = u[1] << 63
-			u[1] = (u[1] >> 1) | t
-			t = t2
-			u[0] = (u[0] >> 1) | t
-
-			if r[0]&1 == 1 {
-
-				// r = r + q
-				r[0], carry = bits.Add64(r[0], 4891460686036598785, 0)
-				r[1], carry = bits.Add64(r[1], 2896914383306846353, carry)
-				r[2], carry = bits.Add64(r[2], 13281191951274694749, carry)
-				r[3], _ = bits.Add64(r[3], 3486998266802970665, carry)
+		// right-shift a by k-1 bits
+		a[0] = (a[0] >> approxLowBitsN) | ((a[1]) << approxHighBitsN)
+		a[1] = (a[1] >> approxLowBitsN) | ((a[2]) << approxHighBitsN)
+		a[2] = (a[2] >> approxLowBitsN) | ((a[3]) << approxHighBitsN)
+		a[3] = (a[3] >> approxLowBitsN) | (aHi << approxHighBitsN)
+
+		var f1 int64
+		// from this point on c1 aliases for g0
+		f1, c1 = updateFactorsDecompose(c1)
+		bHi := b.linearCombNonModular(&s, f1, &b, c1)
+		if bHi&signBitSelector != 0 {
+			// if bHi < 0
+			f1, c1 = -f1, -c1
+			bHi = b.neg(&b, bHi)
+		}
+		// right-shift b by k-1 bits
+		b[0] = (b[0] >> approxLowBitsN) | ((b[1]) << approxHighBitsN)
+		b[1] = (b[1] >> approxLowBitsN) | ((b[2]) << approxHighBitsN)
+		b[2] = (b[2] >> approxLowBitsN) | ((b[3]) << approxHighBitsN)
+		b[3] = (b[3] >> approxLowBitsN) | (bHi << approxHighBitsN)
+
+		if i&1 == 1 {
+			// Combine current update factors with previously stored ones
+			// [f₀, g₀; f₁, g₁] ← [f₀, g₀; f₁, g₀] [pf₀, pg₀; pf₀, pg₀]
+			// We have |f₀|, |g₀|, |pf₀|, |pf₁| ≤ 2ᵏ⁻¹, and that |pf_i| < 2ᵏ⁻¹ for i ∈ {0, 1}
+			// Then for the new value we get |f₀| < 2ᵏ⁻¹ × 2ᵏ⁻¹ + 2ᵏ⁻¹ × 2ᵏ⁻¹ = 2²ᵏ⁻¹
+			// Which leaves us with an extra bit for the sign
+
+			// c0 aliases f0, c1 aliases g1
+			c0, g0, f1, c1 = c0*pf0+g0*pf1,
+				c0*pg0+g0*pg1,
+				f1*pf0+c1*pf1,
+				f1*pg0+c1*pg1
+
+			s = u
+			u.linearCombSosSigned(&u, c0, &v, g0)
+			v.linearCombSosSigned(&s, f1, &v, c1)
 
-			}
+		} else {
+			// Save update factors
+			pf0, pg0, pf1, pg1 = c0, g0, f1, c1
+		}
+	}
+
+	// For every iteration that we miss, v is not being multiplied by 2²ᵏ⁻²
+	const pSq int64 = 1 << (2 * (k - 1))
+	// If the function is constant-time ish, this loop will not run (probably no need to take it out explicitly)
+	for ; i < invIterationsN; i += 2 {
+		v.mulWSigned(&v, pSq)
+	}
+
+	z.Mul(&v, &Element{
+		inversionCorrectionFactorWord0,
+		inversionCorrectionFactorWord1,
+		inversionCorrectionFactorWord2,
+		inversionCorrectionFactorWord3,
+	})
+	return z
+}
+
+// approximate a big number x into a single 64 bit word using its uppermost and lowermost bits
+// if x fits in a word as is, no approximation necessary
+func approximate(x *Element, nBits int) uint64 {
+
+	if nBits <= 64 {
+		return x[0]
+	}
+
+	const mask = (uint64(1) << (k - 1)) - 1 // k-1 ones
+	lo := mask & x[0]
+
+	hiWordIndex := (nBits - 1) / 64
+
+	hiWordBitsAvailable := nBits - hiWordIndex*64
+	hiWordBitsUsed := min(hiWordBitsAvailable, approxHighBitsN)
+
+	mask_ := uint64(^((1 << (hiWordBitsAvailable - hiWordBitsUsed)) - 1))
+	hi := (x[hiWordIndex] & mask_) << (64 - hiWordBitsAvailable)
+
+	mask_ = ^(1<<(approxLowBitsN+hiWordBitsUsed) - 1)
+	mid := (mask_ & x[hiWordIndex-1]) >> hiWordBitsUsed
+
+	return lo | mid | hi
+}
+
+func (z *Element) linearCombSosSigned(x *Element, xC int64, y *Element, yC int64) {
+	hi := z.linearCombNonModular(x, xC, y, yC)
+	z.montReduceSigned(z, hi)
+}
+
+// montReduceSigned SOS algorithm; xHi must be at most 63 bits long. Last bit of xHi may be used as a sign bit
+func (z *Element) montReduceSigned(x *Element, xHi uint64) {
+
+	const signBitRemover = ^signBitSelector
+	neg := xHi&signBitSelector != 0
+	// the SOS implementation requires that most significant bit is 0
+	// Let X be xHi*r + x
+	// note that if X is negative we would have initially stored it as 2⁶⁴ r + X
+	xHi &= signBitRemover
+	// with this a negative X is now represented as 2⁶³ r + X
+
+	var t [2*Limbs - 1]uint64
+	var C uint64
 
-			// r = r >> 1
-			t2 = r[3] << 63
-			r[3] >>= 1
-			t = t2
-			t2 = r[2] << 63
-			r[2] = (r[2] >> 1) | t
-			t = t2
-			t2 = r[1] << 63
-			r[1] = (r[1] >> 1) | t
-			t = t2
-			r[0] = (r[0] >> 1) | t
+	m := x[0] * qInvNegLsw
 
+	C = madd0(m, qElementWord0, x[0])
+	C, t[1] = madd2(m, qElementWord1, x[1], C)
+	C, t[2] = madd2(m, qElementWord2, x[2], C)
+	C, t[3] = madd2(m, qElementWord3, x[3], C)
+
+	// the high word of m * qElement[3] is at most 62 bits
+	// x[3] + C is at most 65 bits (high word at most 1 bit)
+	// Thus the resulting C will be at most 63 bits
+	t[4] = xHi + C
+	// xHi and C are 63 bits, therefore no overflow
+
+	{
+		const i = 1
+		m = t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, t[i+1] = madd2(m, qElementWord1, t[i+1], C)
+		C, t[i+2] = madd2(m, qElementWord2, t[i+2], C)
+		C, t[i+3] = madd2(m, qElementWord3, t[i+3], C)
+
+		t[i+Limbs] += C
+	}
+	{
+		const i = 2
+		m = t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, t[i+1] = madd2(m, qElementWord1, t[i+1], C)
+		C, t[i+2] = madd2(m, qElementWord2, t[i+2], C)
+		C, t[i+3] = madd2(m, qElementWord3, t[i+3], C)
+
+		t[i+Limbs] += C
+	}
+	{
+		const i = 3
+		m := t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, z[0] = madd2(m, qElementWord1, t[i+1], C)
+		C, z[1] = madd2(m, qElementWord2, t[i+2], C)
+		z[3], z[2] = madd2(m, qElementWord3, t[i+3], C)
+	}
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[3] < 3486998266802970665 || (z[3] == 3486998266802970665 && (z[2] < 13281191951274694749 || (z[2] == 13281191951274694749 && (z[1] < 2896914383306846353 || (z[1] == 2896914383306846353 && (z[0] < 4891460686036598785))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 4891460686036598785, 0)
+		z[1], b = bits.Sub64(z[1], 2896914383306846353, b)
+		z[2], b = bits.Sub64(z[2], 13281191951274694749, b)
+		z[3], _ = bits.Sub64(z[3], 3486998266802970665, b)
+	}
+	if neg {
+		// We have computed ( 2⁶³ r + X ) r⁻¹ = 2⁶³ + X r⁻¹ instead
+		var b uint64
+		z[0], b = bits.Sub64(z[0], signBitSelector, 0)
+		z[1], b = bits.Sub64(z[1], 0, b)
+		z[2], b = bits.Sub64(z[2], 0, b)
+		z[3], b = bits.Sub64(z[3], 0, b)
+
+		// Occurs iff x == 0 && xHi < 0, i.e. X = rX' for -2⁶³ ≤ X' < 0
+		if b != 0 {
+			// z[3] = -1
+			// negative: add q
+			const neg1 = 0xFFFFFFFFFFFFFFFF
+
+			b = 0
+			z[0], b = bits.Add64(z[0], qElementWord0, b)
+			z[1], b = bits.Add64(z[1], qElementWord1, b)
+			z[2], b = bits.Add64(z[2], qElementWord2, b)
+			z[3], _ = bits.Add64(neg1, qElementWord3, b)
 		}
+	}
+}
 
-		// v >= u
-		bigger = !(v[3] < u[3] || (v[3] == u[3] && (v[2] < u[2] || (v[2] == u[2] && (v[1] < u[1] || (v[1] == u[1] && (v[0] < u[0])))))))
+// mulWSigned mul word signed (w/ montgomery reduction)
+func (z *Element) mulWSigned(x *Element, y int64) {
+	m := y >> 63
+	_mulWGeneric(z, x, uint64((y^m)-m))
+	// multiply by abs(y)
+	if y < 0 {
+		z.Neg(z)
+	}
+}
 
-		if bigger {
+func (z *Element) neg(x *Element, xHi uint64) uint64 {
+	var b uint64
 
-			// v = v - u
-			v[0], borrow = bits.Sub64(v[0], u[0], 0)
-			v[1], borrow = bits.Sub64(v[1], u[1], borrow)
-			v[2], borrow = bits.Sub64(v[2], u[2], borrow)
-			v[3], _ = bits.Sub64(v[3], u[3], borrow)
+	z[0], b = bits.Sub64(0, x[0], 0)
+	z[1], b = bits.Sub64(0, x[1], b)
+	z[2], b = bits.Sub64(0, x[2], b)
+	z[3], b = bits.Sub64(0, x[3], b)
+	xHi, _ = bits.Sub64(0, xHi, b)
 
-			// s = s - r
-			s[0], borrow = bits.Sub64(s[0], r[0], 0)
-			s[1], borrow = bits.Sub64(s[1], r[1], borrow)
-			s[2], borrow = bits.Sub64(s[2], r[2], borrow)
-			s[3], borrow = bits.Sub64(s[3], r[3], borrow)
+	return xHi
+}
 
-			if borrow == 1 {
+// regular multiplication by one word regular (non montgomery)
+// Fewer additions than the branch-free for positive y. Could be faster on some architectures
+func (z *Element) mulWRegular(x *Element, y int64) uint64 {
 
-				// s = s + q
-				s[0], carry = bits.Add64(s[0], 4891460686036598785, 0)
-				s[1], carry = bits.Add64(s[1], 2896914383306846353, carry)
-				s[2], carry = bits.Add64(s[2], 13281191951274694749, carry)
-				s[3], _ = bits.Add64(s[3], 3486998266802970665, carry)
+	// w := abs(y)
+	m := y >> 63
+	w := uint64((y ^ m) - m)
 
-			}
-		} else {
+	var c uint64
+	c, z[0] = bits.Mul64(x[0], w)
+	c, z[1] = madd1(x[1], w, c)
+	c, z[2] = madd1(x[2], w, c)
+	c, z[3] = madd1(x[3], w, c)
 
-			// u = u - v
-			u[0], borrow = bits.Sub64(u[0], v[0], 0)
-			u[1], borrow = bits.Sub64(u[1], v[1], borrow)
-			u[2], borrow = bits.Sub64(u[2], v[2], borrow)
-			u[3], _ = bits.Sub64(u[3], v[3], borrow)
+	if y < 0 {
+		c = z.neg(z, c)
+	}
 
-			// r = r - s
-			r[0], borrow = bits.Sub64(r[0], s[0], 0)
-			r[1], borrow = bits.Sub64(r[1], s[1], borrow)
-			r[2], borrow = bits.Sub64(r[2], s[2], borrow)
-			r[3], borrow = bits.Sub64(r[3], s[3], borrow)
+	return c
+}
 
-			if borrow == 1 {
+/*
+Removed: seems slower
+// mulWRegular branch-free regular multiplication by one word (non montgomery)
+func (z *Element) mulWRegularBf(x *Element, y int64) uint64 {
 
-				// r = r + q
-				r[0], carry = bits.Add64(r[0], 4891460686036598785, 0)
-				r[1], carry = bits.Add64(r[1], 2896914383306846353, carry)
-				r[2], carry = bits.Add64(r[2], 13281191951274694749, carry)
-				r[3], _ = bits.Add64(r[3], 3486998266802970665, carry)
+	w := uint64(y)
+	allNeg := uint64(y >> 63)	// -1 if y < 0, 0 o.w
 
-			}
+	// s[0], s[1] so results are not stored immediately in z.
+	// x[i] will be needed in the i+1 th iteration. We don't want to overwrite it in case x = z
+	var s [2]uint64
+	var h [2]uint64
+
+	h[0], s[0] = bits.Mul64(x[0], w)
+
+	c := uint64(0)
+	b := uint64(0)
+
+		{
+			const curI = 1 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 1 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[1], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
 		}
-		if (u[0] == 1) && (u[3]|u[2]|u[1]) == 0 {
-			return z.Set(&r)
+
+		{
+			const curI = 2 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 2 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[2], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
 		}
-		if (v[0] == 1) && (v[3]|v[2]|v[1]) == 0 {
-			return z.Set(&s)
+
+		{
+			const curI = 3 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 3 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[3], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
 		}
+	{
+		const curI = 4 % 2
+		const prevI = 1 - curI
+		const iMinusOne = 3
+
+		s[curI], _ = bits.Sub64(h[prevI], allNeg & x[iMinusOne], b)
+		z[iMinusOne] = s[prevI]
+
+		return s[curI] + c
 	}
+}*/
+
+// Requires NoCarry
+func (z *Element) linearCombNonModular(x *Element, xC int64, y *Element, yC int64) uint64 {
+	var yTimes Element
+
+	yHi := yTimes.mulWRegular(y, yC)
+	xHi := z.mulWRegular(x, xC)
+
+	carry := uint64(0)
+	z[0], carry = bits.Add64(z[0], yTimes[0], carry)
+	z[1], carry = bits.Add64(z[1], yTimes[1], carry)
+	z[2], carry = bits.Add64(z[2], yTimes[2], carry)
+	z[3], carry = bits.Add64(z[3], yTimes[3], carry)
+
+	yHi, _ = bits.Add64(xHi, yHi, carry)
 
+	return yHi
 }
diff --git a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/element_exp.go b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/element_exp.go
new file mode 100644
index 00000000000..e8c4da79289
--- /dev/null
+++ b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/element_exp.go
@@ -0,0 +1,819 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fr
+
+// expBySqrtExp is equivalent to z.Exp(x, 183227397098d014dc2822db40c0ac2e9419f4243cdcb848a1f0fac9f)
+//
+// uses github.com/mmcloughlin/addchain v0.4.0 to generate a shorter addition chain
+func (z *Element) expBySqrtExp(x Element) *Element {
+	// addition chain:
+	//
+	//	_10    = 2*1
+	//	_11    = 1 + _10
+	//	_101   = _10 + _11
+	//	_111   = _10 + _101
+	//	_1001  = _10 + _111
+	//	_1011  = _10 + _1001
+	//	_1101  = _10 + _1011
+	//	_1111  = _10 + _1101
+	//	_11000 = _1001 + _1111
+	//	_11111 = _111 + _11000
+	//	i26    = ((_11000 << 4 + _11) << 3 + 1) << 7
+	//	i36    = ((_1001 + i26) << 2 + _11) << 5 + _111
+	//	i53    = (2*(i36 << 6 + _1011) + 1) << 8
+	//	i64    = (2*(_1001 + i53) + 1) << 7 + _1101
+	//	i84    = ((i64 << 10 + _101) << 6 + _1101) << 2
+	//	i100   = ((_11 + i84) << 7 + _101) << 6 + 1
+	//	i117   = ((i100 << 7 + _1011) << 5 + _1101) << 3
+	//	i137   = ((_101 + i117) << 8 + _11) << 9 + _101
+	//	i153   = ((i137 << 3 + _11) << 8 + _1011) << 3
+	//	i168   = ((_101 + i153) << 5 + _101) << 7 + _11
+	//	i187   = ((i168 << 7 + _11111) << 2 + 1) << 8
+	//	i204   = ((_1001 + i187) << 8 + _1111) << 6 + _1101
+	//	i215   = 2*((i204 << 2 + _11) << 6 + _1011)
+	//	i232   = ((1 + i215) << 8 + _1001) << 6 + _101
+	//	i257   = ((i232 << 9 + _11111) << 9 + _11111) << 5
+	//	return   ((_1011 + i257) << 3 + 1) << 7 + _11111
+	//
+	// Operations: 221 squares 49 multiplies
+
+	// Allocate Temporaries.
+	var (
+		t0 = new(Element)
+		t1 = new(Element)
+		t2 = new(Element)
+		t3 = new(Element)
+		t4 = new(Element)
+		t5 = new(Element)
+		t6 = new(Element)
+		t7 = new(Element)
+	)
+
+	// var t0,t1,t2,t3,t4,t5,t6,t7 Element
+	// Step 1: z = x^0x2
+	z.Square(&x)
+
+	// Step 2: t3 = x^0x3
+	t3.Mul(&x, z)
+
+	// Step 3: t1 = x^0x5
+	t1.Mul(z, t3)
+
+	// Step 4: t6 = x^0x7
+	t6.Mul(z, t1)
+
+	// Step 5: t2 = x^0x9
+	t2.Mul(z, t6)
+
+	// Step 6: t0 = x^0xb
+	t0.Mul(z, t2)
+
+	// Step 7: t4 = x^0xd
+	t4.Mul(z, t0)
+
+	// Step 8: t5 = x^0xf
+	t5.Mul(z, t4)
+
+	// Step 9: t7 = x^0x18
+	t7.Mul(t2, t5)
+
+	// Step 10: z = x^0x1f
+	z.Mul(t6, t7)
+
+	// Step 14: t7 = x^0x180
+	for s := 0; s < 4; s++ {
+		t7.Square(t7)
+	}
+
+	// Step 15: t7 = x^0x183
+	t7.Mul(t3, t7)
+
+	// Step 18: t7 = x^0xc18
+	for s := 0; s < 3; s++ {
+		t7.Square(t7)
+	}
+
+	// Step 19: t7 = x^0xc19
+	t7.Mul(&x, t7)
+
+	// Step 26: t7 = x^0x60c80
+	for s := 0; s < 7; s++ {
+		t7.Square(t7)
+	}
+
+	// Step 27: t7 = x^0x60c89
+	t7.Mul(t2, t7)
+
+	// Step 29: t7 = x^0x183224
+	for s := 0; s < 2; s++ {
+		t7.Square(t7)
+	}
+
+	// Step 30: t7 = x^0x183227
+	t7.Mul(t3, t7)
+
+	// Step 35: t7 = x^0x30644e0
+	for s := 0; s < 5; s++ {
+		t7.Square(t7)
+	}
+
+	// Step 36: t6 = x^0x30644e7
+	t6.Mul(t6, t7)
+
+	// Step 42: t6 = x^0xc19139c0
+	for s := 0; s < 6; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 43: t6 = x^0xc19139cb
+	t6.Mul(t0, t6)
+
+	// Step 44: t6 = x^0x183227396
+	t6.Square(t6)
+
+	// Step 45: t6 = x^0x183227397
+	t6.Mul(&x, t6)
+
+	// Step 53: t6 = x^0x18322739700
+	for s := 0; s < 8; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 54: t6 = x^0x18322739709
+	t6.Mul(t2, t6)
+
+	// Step 55: t6 = x^0x30644e72e12
+	t6.Square(t6)
+
+	// Step 56: t6 = x^0x30644e72e13
+	t6.Mul(&x, t6)
+
+	// Step 63: t6 = x^0x1832273970980
+	for s := 0; s < 7; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 64: t6 = x^0x183227397098d
+	t6.Mul(t4, t6)
+
+	// Step 74: t6 = x^0x60c89ce5c263400
+	for s := 0; s < 10; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 75: t6 = x^0x60c89ce5c263405
+	t6.Mul(t1, t6)
+
+	// Step 81: t6 = x^0x183227397098d0140
+	for s := 0; s < 6; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 82: t6 = x^0x183227397098d014d
+	t6.Mul(t4, t6)
+
+	// Step 84: t6 = x^0x60c89ce5c26340534
+	for s := 0; s < 2; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 85: t6 = x^0x60c89ce5c26340537
+	t6.Mul(t3, t6)
+
+	// Step 92: t6 = x^0x30644e72e131a029b80
+	for s := 0; s < 7; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 93: t6 = x^0x30644e72e131a029b85
+	t6.Mul(t1, t6)
+
+	// Step 99: t6 = x^0xc19139cb84c680a6e140
+	for s := 0; s < 6; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 100: t6 = x^0xc19139cb84c680a6e141
+	t6.Mul(&x, t6)
+
+	// Step 107: t6 = x^0x60c89ce5c263405370a080
+	for s := 0; s < 7; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 108: t6 = x^0x60c89ce5c263405370a08b
+	t6.Mul(t0, t6)
+
+	// Step 113: t6 = x^0xc19139cb84c680a6e141160
+	for s := 0; s < 5; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 114: t6 = x^0xc19139cb84c680a6e14116d
+	t6.Mul(t4, t6)
+
+	// Step 117: t6 = x^0x60c89ce5c263405370a08b68
+	for s := 0; s < 3; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 118: t6 = x^0x60c89ce5c263405370a08b6d
+	t6.Mul(t1, t6)
+
+	// Step 126: t6 = x^0x60c89ce5c263405370a08b6d00
+	for s := 0; s < 8; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 127: t6 = x^0x60c89ce5c263405370a08b6d03
+	t6.Mul(t3, t6)
+
+	// Step 136: t6 = x^0xc19139cb84c680a6e14116da0600
+	for s := 0; s < 9; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 137: t6 = x^0xc19139cb84c680a6e14116da0605
+	t6.Mul(t1, t6)
+
+	// Step 140: t6 = x^0x60c89ce5c263405370a08b6d03028
+	for s := 0; s < 3; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 141: t6 = x^0x60c89ce5c263405370a08b6d0302b
+	t6.Mul(t3, t6)
+
+	// Step 149: t6 = x^0x60c89ce5c263405370a08b6d0302b00
+	for s := 0; s < 8; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 150: t6 = x^0x60c89ce5c263405370a08b6d0302b0b
+	t6.Mul(t0, t6)
+
+	// Step 153: t6 = x^0x30644e72e131a029b85045b681815858
+	for s := 0; s < 3; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 154: t6 = x^0x30644e72e131a029b85045b68181585d
+	t6.Mul(t1, t6)
+
+	// Step 159: t6 = x^0x60c89ce5c263405370a08b6d0302b0ba0
+	for s := 0; s < 5; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 160: t6 = x^0x60c89ce5c263405370a08b6d0302b0ba5
+	t6.Mul(t1, t6)
+
+	// Step 167: t6 = x^0x30644e72e131a029b85045b68181585d280
+	for s := 0; s < 7; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 168: t6 = x^0x30644e72e131a029b85045b68181585d283
+	t6.Mul(t3, t6)
+
+	// Step 175: t6 = x^0x183227397098d014dc2822db40c0ac2e94180
+	for s := 0; s < 7; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 176: t6 = x^0x183227397098d014dc2822db40c0ac2e9419f
+	t6.Mul(z, t6)
+
+	// Step 178: t6 = x^0x60c89ce5c263405370a08b6d0302b0ba5067c
+	for s := 0; s < 2; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 179: t6 = x^0x60c89ce5c263405370a08b6d0302b0ba5067d
+	t6.Mul(&x, t6)
+
+	// Step 187: t6 = x^0x60c89ce5c263405370a08b6d0302b0ba5067d00
+	for s := 0; s < 8; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 188: t6 = x^0x60c89ce5c263405370a08b6d0302b0ba5067d09
+	t6.Mul(t2, t6)
+
+	// Step 196: t6 = x^0x60c89ce5c263405370a08b6d0302b0ba5067d0900
+	for s := 0; s < 8; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 197: t5 = x^0x60c89ce5c263405370a08b6d0302b0ba5067d090f
+	t5.Mul(t5, t6)
+
+	// Step 203: t5 = x^0x183227397098d014dc2822db40c0ac2e9419f4243c0
+	for s := 0; s < 6; s++ {
+		t5.Square(t5)
+	}
+
+	// Step 204: t4 = x^0x183227397098d014dc2822db40c0ac2e9419f4243cd
+	t4.Mul(t4, t5)
+
+	// Step 206: t4 = x^0x60c89ce5c263405370a08b6d0302b0ba5067d090f34
+	for s := 0; s < 2; s++ {
+		t4.Square(t4)
+	}
+
+	// Step 207: t3 = x^0x60c89ce5c263405370a08b6d0302b0ba5067d090f37
+	t3.Mul(t3, t4)
+
+	// Step 213: t3 = x^0x183227397098d014dc2822db40c0ac2e9419f4243cdc0
+	for s := 0; s < 6; s++ {
+		t3.Square(t3)
+	}
+
+	// Step 214: t3 = x^0x183227397098d014dc2822db40c0ac2e9419f4243cdcb
+	t3.Mul(t0, t3)
+
+	// Step 215: t3 = x^0x30644e72e131a029b85045b68181585d2833e84879b96
+	t3.Square(t3)
+
+	// Step 216: t3 = x^0x30644e72e131a029b85045b68181585d2833e84879b97
+	t3.Mul(&x, t3)
+
+	// Step 224: t3 = x^0x30644e72e131a029b85045b68181585d2833e84879b9700
+	for s := 0; s < 8; s++ {
+		t3.Square(t3)
+	}
+
+	// Step 225: t2 = x^0x30644e72e131a029b85045b68181585d2833e84879b9709
+	t2.Mul(t2, t3)
+
+	// Step 231: t2 = x^0xc19139cb84c680a6e14116da06056174a0cfa121e6e5c240
+	for s := 0; s < 6; s++ {
+		t2.Square(t2)
+	}
+
+	// Step 232: t1 = x^0xc19139cb84c680a6e14116da06056174a0cfa121e6e5c245
+	t1.Mul(t1, t2)
+
+	// Step 241: t1 = x^0x183227397098d014dc2822db40c0ac2e9419f4243cdcb848a00
+	for s := 0; s < 9; s++ {
+		t1.Square(t1)
+	}
+
+	// Step 242: t1 = x^0x183227397098d014dc2822db40c0ac2e9419f4243cdcb848a1f
+	t1.Mul(z, t1)
+
+	// Step 251: t1 = x^0x30644e72e131a029b85045b68181585d2833e84879b9709143e00
+	for s := 0; s < 9; s++ {
+		t1.Square(t1)
+	}
+
+	// Step 252: t1 = x^0x30644e72e131a029b85045b68181585d2833e84879b9709143e1f
+	t1.Mul(z, t1)
+
+	// Step 257: t1 = x^0x60c89ce5c263405370a08b6d0302b0ba5067d090f372e12287c3e0
+	for s := 0; s < 5; s++ {
+		t1.Square(t1)
+	}
+
+	// Step 258: t0 = x^0x60c89ce5c263405370a08b6d0302b0ba5067d090f372e12287c3eb
+	t0.Mul(t0, t1)
+
+	// Step 261: t0 = x^0x30644e72e131a029b85045b68181585d2833e84879b9709143e1f58
+	for s := 0; s < 3; s++ {
+		t0.Square(t0)
+	}
+
+	// Step 262: t0 = x^0x30644e72e131a029b85045b68181585d2833e84879b9709143e1f59
+	t0.Mul(&x, t0)
+
+	// Step 269: t0 = x^0x183227397098d014dc2822db40c0ac2e9419f4243cdcb848a1f0fac80
+	for s := 0; s < 7; s++ {
+		t0.Square(t0)
+	}
+
+	// Step 270: z = x^0x183227397098d014dc2822db40c0ac2e9419f4243cdcb848a1f0fac9f
+	z.Mul(z, t0)
+
+	return z
+}
+
+// expByLegendreExp is equivalent to z.Exp(x, 183227397098d014dc2822db40c0ac2e9419f4243cdcb848a1f0fac9f8000000)
+//
+// uses github.com/mmcloughlin/addchain v0.4.0 to generate a shorter addition chain
+func (z *Element) expByLegendreExp(x Element) *Element {
+	// addition chain:
+	//
+	//	_10    = 2*1
+	//	_11    = 1 + _10
+	//	_101   = _10 + _11
+	//	_111   = _10 + _101
+	//	_1001  = _10 + _111
+	//	_1011  = _10 + _1001
+	//	_1101  = _10 + _1011
+	//	_1111  = _10 + _1101
+	//	_11000 = _1001 + _1111
+	//	_11111 = _111 + _11000
+	//	i26    = ((_11000 << 4 + _11) << 3 + 1) << 7
+	//	i36    = ((_1001 + i26) << 2 + _11) << 5 + _111
+	//	i53    = (2*(i36 << 6 + _1011) + 1) << 8
+	//	i64    = (2*(_1001 + i53) + 1) << 7 + _1101
+	//	i84    = ((i64 << 10 + _101) << 6 + _1101) << 2
+	//	i100   = ((_11 + i84) << 7 + _101) << 6 + 1
+	//	i117   = ((i100 << 7 + _1011) << 5 + _1101) << 3
+	//	i137   = ((_101 + i117) << 8 + _11) << 9 + _101
+	//	i153   = ((i137 << 3 + _11) << 8 + _1011) << 3
+	//	i168   = ((_101 + i153) << 5 + _101) << 7 + _11
+	//	i187   = ((i168 << 7 + _11111) << 2 + 1) << 8
+	//	i204   = ((_1001 + i187) << 8 + _1111) << 6 + _1101
+	//	i215   = 2*((i204 << 2 + _11) << 6 + _1011)
+	//	i232   = ((1 + i215) << 8 + _1001) << 6 + _101
+	//	i257   = ((i232 << 9 + _11111) << 9 + _11111) << 5
+	//	i270   = ((_1011 + i257) << 3 + 1) << 7 + _11111
+	//	return   (2*i270 + 1) << 27
+	//
+	// Operations: 249 squares 50 multiplies
+
+	// Allocate Temporaries.
+	var (
+		t0 = new(Element)
+		t1 = new(Element)
+		t2 = new(Element)
+		t3 = new(Element)
+		t4 = new(Element)
+		t5 = new(Element)
+		t6 = new(Element)
+		t7 = new(Element)
+	)
+
+	// var t0,t1,t2,t3,t4,t5,t6,t7 Element
+	// Step 1: z = x^0x2
+	z.Square(&x)
+
+	// Step 2: t3 = x^0x3
+	t3.Mul(&x, z)
+
+	// Step 3: t1 = x^0x5
+	t1.Mul(z, t3)
+
+	// Step 4: t6 = x^0x7
+	t6.Mul(z, t1)
+
+	// Step 5: t2 = x^0x9
+	t2.Mul(z, t6)
+
+	// Step 6: t0 = x^0xb
+	t0.Mul(z, t2)
+
+	// Step 7: t4 = x^0xd
+	t4.Mul(z, t0)
+
+	// Step 8: t5 = x^0xf
+	t5.Mul(z, t4)
+
+	// Step 9: t7 = x^0x18
+	t7.Mul(t2, t5)
+
+	// Step 10: z = x^0x1f
+	z.Mul(t6, t7)
+
+	// Step 14: t7 = x^0x180
+	for s := 0; s < 4; s++ {
+		t7.Square(t7)
+	}
+
+	// Step 15: t7 = x^0x183
+	t7.Mul(t3, t7)
+
+	// Step 18: t7 = x^0xc18
+	for s := 0; s < 3; s++ {
+		t7.Square(t7)
+	}
+
+	// Step 19: t7 = x^0xc19
+	t7.Mul(&x, t7)
+
+	// Step 26: t7 = x^0x60c80
+	for s := 0; s < 7; s++ {
+		t7.Square(t7)
+	}
+
+	// Step 27: t7 = x^0x60c89
+	t7.Mul(t2, t7)
+
+	// Step 29: t7 = x^0x183224
+	for s := 0; s < 2; s++ {
+		t7.Square(t7)
+	}
+
+	// Step 30: t7 = x^0x183227
+	t7.Mul(t3, t7)
+
+	// Step 35: t7 = x^0x30644e0
+	for s := 0; s < 5; s++ {
+		t7.Square(t7)
+	}
+
+	// Step 36: t6 = x^0x30644e7
+	t6.Mul(t6, t7)
+
+	// Step 42: t6 = x^0xc19139c0
+	for s := 0; s < 6; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 43: t6 = x^0xc19139cb
+	t6.Mul(t0, t6)
+
+	// Step 44: t6 = x^0x183227396
+	t6.Square(t6)
+
+	// Step 45: t6 = x^0x183227397
+	t6.Mul(&x, t6)
+
+	// Step 53: t6 = x^0x18322739700
+	for s := 0; s < 8; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 54: t6 = x^0x18322739709
+	t6.Mul(t2, t6)
+
+	// Step 55: t6 = x^0x30644e72e12
+	t6.Square(t6)
+
+	// Step 56: t6 = x^0x30644e72e13
+	t6.Mul(&x, t6)
+
+	// Step 63: t6 = x^0x1832273970980
+	for s := 0; s < 7; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 64: t6 = x^0x183227397098d
+	t6.Mul(t4, t6)
+
+	// Step 74: t6 = x^0x60c89ce5c263400
+	for s := 0; s < 10; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 75: t6 = x^0x60c89ce5c263405
+	t6.Mul(t1, t6)
+
+	// Step 81: t6 = x^0x183227397098d0140
+	for s := 0; s < 6; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 82: t6 = x^0x183227397098d014d
+	t6.Mul(t4, t6)
+
+	// Step 84: t6 = x^0x60c89ce5c26340534
+	for s := 0; s < 2; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 85: t6 = x^0x60c89ce5c26340537
+	t6.Mul(t3, t6)
+
+	// Step 92: t6 = x^0x30644e72e131a029b80
+	for s := 0; s < 7; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 93: t6 = x^0x30644e72e131a029b85
+	t6.Mul(t1, t6)
+
+	// Step 99: t6 = x^0xc19139cb84c680a6e140
+	for s := 0; s < 6; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 100: t6 = x^0xc19139cb84c680a6e141
+	t6.Mul(&x, t6)
+
+	// Step 107: t6 = x^0x60c89ce5c263405370a080
+	for s := 0; s < 7; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 108: t6 = x^0x60c89ce5c263405370a08b
+	t6.Mul(t0, t6)
+
+	// Step 113: t6 = x^0xc19139cb84c680a6e141160
+	for s := 0; s < 5; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 114: t6 = x^0xc19139cb84c680a6e14116d
+	t6.Mul(t4, t6)
+
+	// Step 117: t6 = x^0x60c89ce5c263405370a08b68
+	for s := 0; s < 3; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 118: t6 = x^0x60c89ce5c263405370a08b6d
+	t6.Mul(t1, t6)
+
+	// Step 126: t6 = x^0x60c89ce5c263405370a08b6d00
+	for s := 0; s < 8; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 127: t6 = x^0x60c89ce5c263405370a08b6d03
+	t6.Mul(t3, t6)
+
+	// Step 136: t6 = x^0xc19139cb84c680a6e14116da0600
+	for s := 0; s < 9; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 137: t6 = x^0xc19139cb84c680a6e14116da0605
+	t6.Mul(t1, t6)
+
+	// Step 140: t6 = x^0x60c89ce5c263405370a08b6d03028
+	for s := 0; s < 3; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 141: t6 = x^0x60c89ce5c263405370a08b6d0302b
+	t6.Mul(t3, t6)
+
+	// Step 149: t6 = x^0x60c89ce5c263405370a08b6d0302b00
+	for s := 0; s < 8; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 150: t6 = x^0x60c89ce5c263405370a08b6d0302b0b
+	t6.Mul(t0, t6)
+
+	// Step 153: t6 = x^0x30644e72e131a029b85045b681815858
+	for s := 0; s < 3; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 154: t6 = x^0x30644e72e131a029b85045b68181585d
+	t6.Mul(t1, t6)
+
+	// Step 159: t6 = x^0x60c89ce5c263405370a08b6d0302b0ba0
+	for s := 0; s < 5; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 160: t6 = x^0x60c89ce5c263405370a08b6d0302b0ba5
+	t6.Mul(t1, t6)
+
+	// Step 167: t6 = x^0x30644e72e131a029b85045b68181585d280
+	for s := 0; s < 7; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 168: t6 = x^0x30644e72e131a029b85045b68181585d283
+	t6.Mul(t3, t6)
+
+	// Step 175: t6 = x^0x183227397098d014dc2822db40c0ac2e94180
+	for s := 0; s < 7; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 176: t6 = x^0x183227397098d014dc2822db40c0ac2e9419f
+	t6.Mul(z, t6)
+
+	// Step 178: t6 = x^0x60c89ce5c263405370a08b6d0302b0ba5067c
+	for s := 0; s < 2; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 179: t6 = x^0x60c89ce5c263405370a08b6d0302b0ba5067d
+	t6.Mul(&x, t6)
+
+	// Step 187: t6 = x^0x60c89ce5c263405370a08b6d0302b0ba5067d00
+	for s := 0; s < 8; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 188: t6 = x^0x60c89ce5c263405370a08b6d0302b0ba5067d09
+	t6.Mul(t2, t6)
+
+	// Step 196: t6 = x^0x60c89ce5c263405370a08b6d0302b0ba5067d0900
+	for s := 0; s < 8; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 197: t5 = x^0x60c89ce5c263405370a08b6d0302b0ba5067d090f
+	t5.Mul(t5, t6)
+
+	// Step 203: t5 = x^0x183227397098d014dc2822db40c0ac2e9419f4243c0
+	for s := 0; s < 6; s++ {
+		t5.Square(t5)
+	}
+
+	// Step 204: t4 = x^0x183227397098d014dc2822db40c0ac2e9419f4243cd
+	t4.Mul(t4, t5)
+
+	// Step 206: t4 = x^0x60c89ce5c263405370a08b6d0302b0ba5067d090f34
+	for s := 0; s < 2; s++ {
+		t4.Square(t4)
+	}
+
+	// Step 207: t3 = x^0x60c89ce5c263405370a08b6d0302b0ba5067d090f37
+	t3.Mul(t3, t4)
+
+	// Step 213: t3 = x^0x183227397098d014dc2822db40c0ac2e9419f4243cdc0
+	for s := 0; s < 6; s++ {
+		t3.Square(t3)
+	}
+
+	// Step 214: t3 = x^0x183227397098d014dc2822db40c0ac2e9419f4243cdcb
+	t3.Mul(t0, t3)
+
+	// Step 215: t3 = x^0x30644e72e131a029b85045b68181585d2833e84879b96
+	t3.Square(t3)
+
+	// Step 216: t3 = x^0x30644e72e131a029b85045b68181585d2833e84879b97
+	t3.Mul(&x, t3)
+
+	// Step 224: t3 = x^0x30644e72e131a029b85045b68181585d2833e84879b9700
+	for s := 0; s < 8; s++ {
+		t3.Square(t3)
+	}
+
+	// Step 225: t2 = x^0x30644e72e131a029b85045b68181585d2833e84879b9709
+	t2.Mul(t2, t3)
+
+	// Step 231: t2 = x^0xc19139cb84c680a6e14116da06056174a0cfa121e6e5c240
+	for s := 0; s < 6; s++ {
+		t2.Square(t2)
+	}
+
+	// Step 232: t1 = x^0xc19139cb84c680a6e14116da06056174a0cfa121e6e5c245
+	t1.Mul(t1, t2)
+
+	// Step 241: t1 = x^0x183227397098d014dc2822db40c0ac2e9419f4243cdcb848a00
+	for s := 0; s < 9; s++ {
+		t1.Square(t1)
+	}
+
+	// Step 242: t1 = x^0x183227397098d014dc2822db40c0ac2e9419f4243cdcb848a1f
+	t1.Mul(z, t1)
+
+	// Step 251: t1 = x^0x30644e72e131a029b85045b68181585d2833e84879b9709143e00
+	for s := 0; s < 9; s++ {
+		t1.Square(t1)
+	}
+
+	// Step 252: t1 = x^0x30644e72e131a029b85045b68181585d2833e84879b9709143e1f
+	t1.Mul(z, t1)
+
+	// Step 257: t1 = x^0x60c89ce5c263405370a08b6d0302b0ba5067d090f372e12287c3e0
+	for s := 0; s < 5; s++ {
+		t1.Square(t1)
+	}
+
+	// Step 258: t0 = x^0x60c89ce5c263405370a08b6d0302b0ba5067d090f372e12287c3eb
+	t0.Mul(t0, t1)
+
+	// Step 261: t0 = x^0x30644e72e131a029b85045b68181585d2833e84879b9709143e1f58
+	for s := 0; s < 3; s++ {
+		t0.Square(t0)
+	}
+
+	// Step 262: t0 = x^0x30644e72e131a029b85045b68181585d2833e84879b9709143e1f59
+	t0.Mul(&x, t0)
+
+	// Step 269: t0 = x^0x183227397098d014dc2822db40c0ac2e9419f4243cdcb848a1f0fac80
+	for s := 0; s < 7; s++ {
+		t0.Square(t0)
+	}
+
+	// Step 270: z = x^0x183227397098d014dc2822db40c0ac2e9419f4243cdcb848a1f0fac9f
+	z.Mul(z, t0)
+
+	// Step 271: z = x^0x30644e72e131a029b85045b68181585d2833e84879b9709143e1f593e
+	z.Square(z)
+
+	// Step 272: z = x^0x30644e72e131a029b85045b68181585d2833e84879b9709143e1f593f
+	z.Mul(&x, z)
+
+	// Step 299: z = x^0x183227397098d014dc2822db40c0ac2e9419f4243cdcb848a1f0fac9f8000000
+	for s := 0; s < 27; s++ {
+		z.Square(z)
+	}
+
+	return z
+}
diff --git a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/element_fuzz.go b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/element_fuzz.go
new file mode 100644
index 00000000000..a4c87eb250f
--- /dev/null
+++ b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/element_fuzz.go
@@ -0,0 +1,136 @@
+//go:build gofuzz
+// +build gofuzz
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fr
+
+import (
+	"bytes"
+	"encoding/binary"
+	"io"
+	"math/big"
+	"math/bits"
+)
+
+const (
+	fuzzInteresting = 1
+	fuzzNormal      = 0
+	fuzzDiscard     = -1
+)
+
+// Fuzz arithmetic operations fuzzer
+func Fuzz(data []byte) int {
+	r := bytes.NewReader(data)
+
+	var e1, e2 Element
+	e1.SetRawBytes(r)
+	e2.SetRawBytes(r)
+
+	{
+		// mul assembly
+
+		var c, _c Element
+		a, _a, b, _b := e1, e1, e2, e2
+		c.Mul(&a, &b)
+		_mulGeneric(&_c, &_a, &_b)
+
+		if !c.Equal(&_c) {
+			panic("mul asm != mul generic on Element")
+		}
+	}
+
+	{
+		// inverse
+		inv := e1
+		inv.Inverse(&inv)
+
+		var bInv, b1, b2 big.Int
+		e1.ToBigIntRegular(&b1)
+		bInv.ModInverse(&b1, Modulus())
+		inv.ToBigIntRegular(&b2)
+
+		if b2.Cmp(&bInv) != 0 {
+			panic("inverse operation doesn't match big int result")
+		}
+	}
+
+	{
+		// a + -a == 0
+		a, b := e1, e1
+		b.Neg(&b)
+		a.Add(&a, &b)
+		if !a.IsZero() {
+			panic("a + -a != 0")
+		}
+	}
+
+	return fuzzNormal
+
+}
+
+// SetRawBytes reads up to Bytes (bytes needed to represent Element) from reader
+// and interpret it as big endian uint64
+// used for fuzzing purposes only
+func (z *Element) SetRawBytes(r io.Reader) {
+
+	buf := make([]byte, 8)
+
+	for i := 0; i < len(z); i++ {
+		if _, err := io.ReadFull(r, buf); err != nil {
+			goto eof
+		}
+		z[i] = binary.BigEndian.Uint64(buf[:])
+	}
+eof:
+	z[3] %= qElement[3]
+
+	if z.BiggerModulus() {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], qElement[0], 0)
+		z[1], b = bits.Sub64(z[1], qElement[1], b)
+		z[2], b = bits.Sub64(z[2], qElement[2], b)
+		z[3], b = bits.Sub64(z[3], qElement[3], b)
+	}
+
+	return
+}
+
+func (z *Element) BiggerModulus() bool {
+	if z[3] > qElement[3] {
+		return true
+	}
+	if z[3] < qElement[3] {
+		return false
+	}
+
+	if z[2] > qElement[2] {
+		return true
+	}
+	if z[2] < qElement[2] {
+		return false
+	}
+
+	if z[1] > qElement[1] {
+		return true
+	}
+	if z[1] < qElement[1] {
+		return false
+	}
+
+	return z[0] >= qElement[0]
+}
diff --git a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/element_mul_adx_amd64.s b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/element_mul_adx_amd64.s
index 494e7bfd7e2..bc0b747d255 100644
--- a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/element_mul_adx_amd64.s
+++ b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/element_mul_adx_amd64.s
@@ -46,7 +46,7 @@ GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
 TEXT ·mul(SB), NOSPLIT, $0-24
 
 	// the algorithm is described here
-	// https://hackmd.io/@zkteam/modular_multiplication
+	// https://hackmd.io/@gnark/modular_multiplication
 	// however, to benefit from the ADCX and ADOX carry chains
 	// we split the inner loops in 2:
 	// for i=0 to N-1
@@ -72,7 +72,7 @@ TEXT ·mul(SB), NOSPLIT, $0-24
 
 	// A -> BP
 	// t[0] -> R14
-	// t[1] -> R15
+	// t[1] -> R13
 	// t[2] -> CX
 	// t[3] -> BX
 	// clear the flags
@@ -80,11 +80,11 @@ TEXT ·mul(SB), NOSPLIT, $0-24
 	MOVQ 0(R11), DX
 
 	// (A,t[0])  := x[0]*y[0] + A
-	MULXQ DI, R14, R15
+	MULXQ DI, R14, R13
 
 	// (A,t[1])  := x[1]*y[0] + A
 	MULXQ R8, AX, CX
-	ADOXQ AX, R15
+	ADOXQ AX, R13
 
 	// (A,t[2])  := x[2]*y[0] + A
 	MULXQ R9, AX, BX
@@ -111,14 +111,14 @@ TEXT ·mul(SB), NOSPLIT, $0-24
 	MOVQ  R12, R14
 
 	// (C,t[0]) := t[1] + m*q[1] + C
-	ADCXQ R15, R14
-	MULXQ q<>+8(SB), AX, R15
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
 	ADOXQ AX, R14
 
 	// (C,t[1]) := t[2] + m*q[2] + C
-	ADCXQ CX, R15
+	ADCXQ CX, R13
 	MULXQ q<>+16(SB), AX, CX
-	ADOXQ AX, R15
+	ADOXQ AX, R13
 
 	// (C,t[2]) := t[3] + m*q[3] + C
 	ADCXQ BX, CX
@@ -139,9 +139,9 @@ TEXT ·mul(SB), NOSPLIT, $0-24
 	ADOXQ AX, R14
 
 	// (A,t[1])  := t[1] + x[1]*y[1] + A
-	ADCXQ BP, R15
+	ADCXQ BP, R13
 	MULXQ R8, AX, BP
-	ADOXQ AX, R15
+	ADOXQ AX, R13
 
 	// (A,t[2])  := t[2] + x[2]*y[1] + A
 	ADCXQ BP, CX
@@ -171,14 +171,14 @@ TEXT ·mul(SB), NOSPLIT, $0-24
 	MOVQ  R12, R14
 
 	// (C,t[0]) := t[1] + m*q[1] + C
-	ADCXQ R15, R14
-	MULXQ q<>+8(SB), AX, R15
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
 	ADOXQ AX, R14
 
 	// (C,t[1]) := t[2] + m*q[2] + C
-	ADCXQ CX, R15
+	ADCXQ CX, R13
 	MULXQ q<>+16(SB), AX, CX
-	ADOXQ AX, R15
+	ADOXQ AX, R13
 
 	// (C,t[2]) := t[3] + m*q[3] + C
 	ADCXQ BX, CX
@@ -199,9 +199,9 @@ TEXT ·mul(SB), NOSPLIT, $0-24
 	ADOXQ AX, R14
 
 	// (A,t[1])  := t[1] + x[1]*y[2] + A
-	ADCXQ BP, R15
+	ADCXQ BP, R13
 	MULXQ R8, AX, BP
-	ADOXQ AX, R15
+	ADOXQ AX, R13
 
 	// (A,t[2])  := t[2] + x[2]*y[2] + A
 	ADCXQ BP, CX
@@ -231,14 +231,14 @@ TEXT ·mul(SB), NOSPLIT, $0-24
 	MOVQ  R12, R14
 
 	// (C,t[0]) := t[1] + m*q[1] + C
-	ADCXQ R15, R14
-	MULXQ q<>+8(SB), AX, R15
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
 	ADOXQ AX, R14
 
 	// (C,t[1]) := t[2] + m*q[2] + C
-	ADCXQ CX, R15
+	ADCXQ CX, R13
 	MULXQ q<>+16(SB), AX, CX
-	ADOXQ AX, R15
+	ADOXQ AX, R13
 
 	// (C,t[2]) := t[3] + m*q[3] + C
 	ADCXQ BX, CX
@@ -259,9 +259,9 @@ TEXT ·mul(SB), NOSPLIT, $0-24
 	ADOXQ AX, R14
 
 	// (A,t[1])  := t[1] + x[1]*y[3] + A
-	ADCXQ BP, R15
+	ADCXQ BP, R13
 	MULXQ R8, AX, BP
-	ADOXQ AX, R15
+	ADOXQ AX, R13
 
 	// (A,t[2])  := t[2] + x[2]*y[3] + A
 	ADCXQ BP, CX
@@ -291,14 +291,14 @@ TEXT ·mul(SB), NOSPLIT, $0-24
 	MOVQ  R12, R14
 
 	// (C,t[0]) := t[1] + m*q[1] + C
-	ADCXQ R15, R14
-	MULXQ q<>+8(SB), AX, R15
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
 	ADOXQ AX, R14
 
 	// (C,t[1]) := t[2] + m*q[2] + C
-	ADCXQ CX, R15
+	ADCXQ CX, R13
 	MULXQ q<>+16(SB), AX, CX
-	ADOXQ AX, R15
+	ADOXQ AX, R13
 
 	// (C,t[2]) := t[3] + m*q[3] + C
 	ADCXQ BX, CX
@@ -310,12 +310,12 @@ TEXT ·mul(SB), NOSPLIT, $0-24
 	ADCXQ AX, BX
 	ADOXQ BP, BX
 
-	// reduce element(R14,R15,CX,BX) using temp registers (R13,SI,R12,R11)
-	REDUCE(R14,R15,CX,BX,R13,SI,R12,R11)
+	// reduce element(R14,R13,CX,BX) using temp registers (SI,R12,R11,DI)
+	REDUCE(R14,R13,CX,BX,SI,R12,R11,DI)
 
 	MOVQ res+0(FP), AX
 	MOVQ R14, 0(AX)
-	MOVQ R15, 8(AX)
+	MOVQ R13, 8(AX)
 	MOVQ CX, 16(AX)
 	MOVQ BX, 24(AX)
 	RET
@@ -323,7 +323,7 @@ TEXT ·mul(SB), NOSPLIT, $0-24
 TEXT ·fromMont(SB), NOSPLIT, $0-8
 
 	// the algorithm is described here
-	// https://hackmd.io/@zkteam/modular_multiplication
+	// https://hackmd.io/@gnark/modular_multiplication
 	// when y = 1 we have:
 	// for i=0 to N-1
 	// 		t[i] = x[i]
@@ -335,7 +335,7 @@ TEXT ·fromMont(SB), NOSPLIT, $0-8
 	// 		t[N-1] = C
 	MOVQ res+0(FP), DX
 	MOVQ 0(DX), R14
-	MOVQ 8(DX), R15
+	MOVQ 8(DX), R13
 	MOVQ 16(DX), CX
 	MOVQ 24(DX), BX
 	XORQ DX, DX
@@ -351,14 +351,14 @@ TEXT ·fromMont(SB), NOSPLIT, $0-8
 	MOVQ  BP, R14
 
 	// (C,t[0]) := t[1] + m*q[1] + C
-	ADCXQ R15, R14
-	MULXQ q<>+8(SB), AX, R15
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
 	ADOXQ AX, R14
 
 	// (C,t[1]) := t[2] + m*q[2] + C
-	ADCXQ CX, R15
+	ADCXQ CX, R13
 	MULXQ q<>+16(SB), AX, CX
-	ADOXQ AX, R15
+	ADOXQ AX, R13
 
 	// (C,t[2]) := t[3] + m*q[3] + C
 	ADCXQ BX, CX
@@ -380,14 +380,14 @@ TEXT ·fromMont(SB), NOSPLIT, $0-8
 	MOVQ  BP, R14
 
 	// (C,t[0]) := t[1] + m*q[1] + C
-	ADCXQ R15, R14
-	MULXQ q<>+8(SB), AX, R15
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
 	ADOXQ AX, R14
 
 	// (C,t[1]) := t[2] + m*q[2] + C
-	ADCXQ CX, R15
+	ADCXQ CX, R13
 	MULXQ q<>+16(SB), AX, CX
-	ADOXQ AX, R15
+	ADOXQ AX, R13
 
 	// (C,t[2]) := t[3] + m*q[3] + C
 	ADCXQ BX, CX
@@ -409,14 +409,14 @@ TEXT ·fromMont(SB), NOSPLIT, $0-8
 	MOVQ  BP, R14
 
 	// (C,t[0]) := t[1] + m*q[1] + C
-	ADCXQ R15, R14
-	MULXQ q<>+8(SB), AX, R15
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
 	ADOXQ AX, R14
 
 	// (C,t[1]) := t[2] + m*q[2] + C
-	ADCXQ CX, R15
+	ADCXQ CX, R13
 	MULXQ q<>+16(SB), AX, CX
-	ADOXQ AX, R15
+	ADOXQ AX, R13
 
 	// (C,t[2]) := t[3] + m*q[3] + C
 	ADCXQ BX, CX
@@ -438,14 +438,14 @@ TEXT ·fromMont(SB), NOSPLIT, $0-8
 	MOVQ  BP, R14
 
 	// (C,t[0]) := t[1] + m*q[1] + C
-	ADCXQ R15, R14
-	MULXQ q<>+8(SB), AX, R15
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
 	ADOXQ AX, R14
 
 	// (C,t[1]) := t[2] + m*q[2] + C
-	ADCXQ CX, R15
+	ADCXQ CX, R13
 	MULXQ q<>+16(SB), AX, CX
-	ADOXQ AX, R15
+	ADOXQ AX, R13
 
 	// (C,t[2]) := t[3] + m*q[3] + C
 	ADCXQ BX, CX
@@ -455,12 +455,12 @@ TEXT ·fromMont(SB), NOSPLIT, $0-8
 	ADCXQ AX, BX
 	ADOXQ AX, BX
 
-	// reduce element(R14,R15,CX,BX) using temp registers (SI,DI,R8,R9)
-	REDUCE(R14,R15,CX,BX,SI,DI,R8,R9)
+	// reduce element(R14,R13,CX,BX) using temp registers (SI,DI,R8,R9)
+	REDUCE(R14,R13,CX,BX,SI,DI,R8,R9)
 
 	MOVQ res+0(FP), AX
 	MOVQ R14, 0(AX)
-	MOVQ R15, 8(AX)
+	MOVQ R13, 8(AX)
 	MOVQ CX, 16(AX)
 	MOVQ BX, 24(AX)
 	RET
diff --git a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/element_mul_amd64.s b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/element_mul_amd64.s
index 38b3b6cf629..764b9c43285 100644
--- a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/element_mul_amd64.s
+++ b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/element_mul_amd64.s
@@ -46,7 +46,7 @@ GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
 TEXT ·mul(SB), $24-24
 
 	// the algorithm is described here
-	// https://hackmd.io/@zkteam/modular_multiplication
+	// https://hackmd.io/@gnark/modular_multiplication
 	// however, to benefit from the ADCX and ADOX carry chains
 	// we split the inner loops in 2:
 	// for i=0 to N-1
@@ -75,7 +75,7 @@ TEXT ·mul(SB), $24-24
 
 	// A -> BP
 	// t[0] -> R14
-	// t[1] -> R15
+	// t[1] -> R13
 	// t[2] -> CX
 	// t[3] -> BX
 	// clear the flags
@@ -83,11 +83,11 @@ TEXT ·mul(SB), $24-24
 	MOVQ 0(R11), DX
 
 	// (A,t[0])  := x[0]*y[0] + A
-	MULXQ DI, R14, R15
+	MULXQ DI, R14, R13
 
 	// (A,t[1])  := x[1]*y[0] + A
 	MULXQ R8, AX, CX
-	ADOXQ AX, R15
+	ADOXQ AX, R13
 
 	// (A,t[2])  := x[2]*y[0] + A
 	MULXQ R9, AX, BX
@@ -114,14 +114,14 @@ TEXT ·mul(SB), $24-24
 	MOVQ  R12, R14
 
 	// (C,t[0]) := t[1] + m*q[1] + C
-	ADCXQ R15, R14
-	MULXQ q<>+8(SB), AX, R15
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
 	ADOXQ AX, R14
 
 	// (C,t[1]) := t[2] + m*q[2] + C
-	ADCXQ CX, R15
+	ADCXQ CX, R13
 	MULXQ q<>+16(SB), AX, CX
-	ADOXQ AX, R15
+	ADOXQ AX, R13
 
 	// (C,t[2]) := t[3] + m*q[3] + C
 	ADCXQ BX, CX
@@ -142,9 +142,9 @@ TEXT ·mul(SB), $24-24
 	ADOXQ AX, R14
 
 	// (A,t[1])  := t[1] + x[1]*y[1] + A
-	ADCXQ BP, R15
+	ADCXQ BP, R13
 	MULXQ R8, AX, BP
-	ADOXQ AX, R15
+	ADOXQ AX, R13
 
 	// (A,t[2])  := t[2] + x[2]*y[1] + A
 	ADCXQ BP, CX
@@ -174,14 +174,14 @@ TEXT ·mul(SB), $24-24
 	MOVQ  R12, R14
 
 	// (C,t[0]) := t[1] + m*q[1] + C
-	ADCXQ R15, R14
-	MULXQ q<>+8(SB), AX, R15
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
 	ADOXQ AX, R14
 
 	// (C,t[1]) := t[2] + m*q[2] + C
-	ADCXQ CX, R15
+	ADCXQ CX, R13
 	MULXQ q<>+16(SB), AX, CX
-	ADOXQ AX, R15
+	ADOXQ AX, R13
 
 	// (C,t[2]) := t[3] + m*q[3] + C
 	ADCXQ BX, CX
@@ -202,9 +202,9 @@ TEXT ·mul(SB), $24-24
 	ADOXQ AX, R14
 
 	// (A,t[1])  := t[1] + x[1]*y[2] + A
-	ADCXQ BP, R15
+	ADCXQ BP, R13
 	MULXQ R8, AX, BP
-	ADOXQ AX, R15
+	ADOXQ AX, R13
 
 	// (A,t[2])  := t[2] + x[2]*y[2] + A
 	ADCXQ BP, CX
@@ -234,14 +234,14 @@ TEXT ·mul(SB), $24-24
 	MOVQ  R12, R14
 
 	// (C,t[0]) := t[1] + m*q[1] + C
-	ADCXQ R15, R14
-	MULXQ q<>+8(SB), AX, R15
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
 	ADOXQ AX, R14
 
 	// (C,t[1]) := t[2] + m*q[2] + C
-	ADCXQ CX, R15
+	ADCXQ CX, R13
 	MULXQ q<>+16(SB), AX, CX
-	ADOXQ AX, R15
+	ADOXQ AX, R13
 
 	// (C,t[2]) := t[3] + m*q[3] + C
 	ADCXQ BX, CX
@@ -262,9 +262,9 @@ TEXT ·mul(SB), $24-24
 	ADOXQ AX, R14
 
 	// (A,t[1])  := t[1] + x[1]*y[3] + A
-	ADCXQ BP, R15
+	ADCXQ BP, R13
 	MULXQ R8, AX, BP
-	ADOXQ AX, R15
+	ADOXQ AX, R13
 
 	// (A,t[2])  := t[2] + x[2]*y[3] + A
 	ADCXQ BP, CX
@@ -294,14 +294,14 @@ TEXT ·mul(SB), $24-24
 	MOVQ  R12, R14
 
 	// (C,t[0]) := t[1] + m*q[1] + C
-	ADCXQ R15, R14
-	MULXQ q<>+8(SB), AX, R15
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
 	ADOXQ AX, R14
 
 	// (C,t[1]) := t[2] + m*q[2] + C
-	ADCXQ CX, R15
+	ADCXQ CX, R13
 	MULXQ q<>+16(SB), AX, CX
-	ADOXQ AX, R15
+	ADOXQ AX, R13
 
 	// (C,t[2]) := t[3] + m*q[3] + C
 	ADCXQ BX, CX
@@ -313,12 +313,12 @@ TEXT ·mul(SB), $24-24
 	ADCXQ AX, BX
 	ADOXQ BP, BX
 
-	// reduce element(R14,R15,CX,BX) using temp registers (R13,SI,R12,R11)
-	REDUCE(R14,R15,CX,BX,R13,SI,R12,R11)
+	// reduce element(R14,R13,CX,BX) using temp registers (SI,R12,R11,DI)
+	REDUCE(R14,R13,CX,BX,SI,R12,R11,DI)
 
 	MOVQ res+0(FP), AX
 	MOVQ R14, 0(AX)
-	MOVQ R15, 8(AX)
+	MOVQ R13, 8(AX)
 	MOVQ CX, 16(AX)
 	MOVQ BX, 24(AX)
 	RET
@@ -337,7 +337,7 @@ TEXT ·fromMont(SB), $8-8
 	NO_LOCAL_POINTERS
 
 	// the algorithm is described here
-	// https://hackmd.io/@zkteam/modular_multiplication
+	// https://hackmd.io/@gnark/modular_multiplication
 	// when y = 1 we have:
 	// for i=0 to N-1
 	// 		t[i] = x[i]
@@ -351,7 +351,7 @@ TEXT ·fromMont(SB), $8-8
 	JNE  l2
 	MOVQ res+0(FP), DX
 	MOVQ 0(DX), R14
-	MOVQ 8(DX), R15
+	MOVQ 8(DX), R13
 	MOVQ 16(DX), CX
 	MOVQ 24(DX), BX
 	XORQ DX, DX
@@ -367,14 +367,14 @@ TEXT ·fromMont(SB), $8-8
 	MOVQ  BP, R14
 
 	// (C,t[0]) := t[1] + m*q[1] + C
-	ADCXQ R15, R14
-	MULXQ q<>+8(SB), AX, R15
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
 	ADOXQ AX, R14
 
 	// (C,t[1]) := t[2] + m*q[2] + C
-	ADCXQ CX, R15
+	ADCXQ CX, R13
 	MULXQ q<>+16(SB), AX, CX
-	ADOXQ AX, R15
+	ADOXQ AX, R13
 
 	// (C,t[2]) := t[3] + m*q[3] + C
 	ADCXQ BX, CX
@@ -396,14 +396,14 @@ TEXT ·fromMont(SB), $8-8
 	MOVQ  BP, R14
 
 	// (C,t[0]) := t[1] + m*q[1] + C
-	ADCXQ R15, R14
-	MULXQ q<>+8(SB), AX, R15
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
 	ADOXQ AX, R14
 
 	// (C,t[1]) := t[2] + m*q[2] + C
-	ADCXQ CX, R15
+	ADCXQ CX, R13
 	MULXQ q<>+16(SB), AX, CX
-	ADOXQ AX, R15
+	ADOXQ AX, R13
 
 	// (C,t[2]) := t[3] + m*q[3] + C
 	ADCXQ BX, CX
@@ -425,14 +425,14 @@ TEXT ·fromMont(SB), $8-8
 	MOVQ  BP, R14
 
 	// (C,t[0]) := t[1] + m*q[1] + C
-	ADCXQ R15, R14
-	MULXQ q<>+8(SB), AX, R15
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
 	ADOXQ AX, R14
 
 	// (C,t[1]) := t[2] + m*q[2] + C
-	ADCXQ CX, R15
+	ADCXQ CX, R13
 	MULXQ q<>+16(SB), AX, CX
-	ADOXQ AX, R15
+	ADOXQ AX, R13
 
 	// (C,t[2]) := t[3] + m*q[3] + C
 	ADCXQ BX, CX
@@ -454,14 +454,14 @@ TEXT ·fromMont(SB), $8-8
 	MOVQ  BP, R14
 
 	// (C,t[0]) := t[1] + m*q[1] + C
-	ADCXQ R15, R14
-	MULXQ q<>+8(SB), AX, R15
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
 	ADOXQ AX, R14
 
 	// (C,t[1]) := t[2] + m*q[2] + C
-	ADCXQ CX, R15
+	ADCXQ CX, R13
 	MULXQ q<>+16(SB), AX, CX
-	ADOXQ AX, R15
+	ADOXQ AX, R13
 
 	// (C,t[2]) := t[3] + m*q[3] + C
 	ADCXQ BX, CX
@@ -471,12 +471,12 @@ TEXT ·fromMont(SB), $8-8
 	ADCXQ AX, BX
 	ADOXQ AX, BX
 
-	// reduce element(R14,R15,CX,BX) using temp registers (SI,DI,R8,R9)
-	REDUCE(R14,R15,CX,BX,SI,DI,R8,R9)
+	// reduce element(R14,R13,CX,BX) using temp registers (SI,DI,R8,R9)
+	REDUCE(R14,R13,CX,BX,SI,DI,R8,R9)
 
 	MOVQ res+0(FP), AX
 	MOVQ R14, 0(AX)
-	MOVQ R15, 8(AX)
+	MOVQ R13, 8(AX)
 	MOVQ CX, 16(AX)
 	MOVQ BX, 24(AX)
 	RET
diff --git a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/element_ops_amd64.go b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/element_ops_amd64.go
index f0d8316e528..78022b3e6f7 100644
--- a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/element_ops_amd64.go
+++ b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/element_ops_amd64.go
@@ -22,6 +22,9 @@ func MulBy3(x *Element)
 //go:noescape
 func MulBy5(x *Element)
 
+//go:noescape
+func MulBy13(x *Element)
+
 //go:noescape
 func add(res, x, y *Element)
 
@@ -42,3 +45,6 @@ func fromMont(res *Element)
 
 //go:noescape
 func reduce(res *Element)
+
+//go:noescape
+func Butterfly(a, b *Element)
diff --git a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/element_ops_amd64.s b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/element_ops_amd64.s
index 83133a8f5eb..d5dca83d259 100644
--- a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/element_ops_amd64.s
+++ b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/element_ops_amd64.s
@@ -233,3 +233,108 @@ TEXT ·MulBy5(SB), NOSPLIT, $0-8
 	MOVQ BX, 16(AX)
 	MOVQ SI, 24(AX)
 	RET
+
+// MulBy13(x *Element)
+TEXT ·MulBy13(SB), NOSPLIT, $0-8
+	MOVQ x+0(FP), AX
+	MOVQ 0(AX), DX
+	MOVQ 8(AX), CX
+	MOVQ 16(AX), BX
+	MOVQ 24(AX), SI
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+
+	// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
+
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+
+	// reduce element(DX,CX,BX,SI) using temp registers (R11,R12,R13,R14)
+	REDUCE(DX,CX,BX,SI,R11,R12,R13,R14)
+
+	MOVQ DX, R11
+	MOVQ CX, R12
+	MOVQ BX, R13
+	MOVQ SI, R14
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+
+	// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
+
+	ADDQ R11, DX
+	ADCQ R12, CX
+	ADCQ R13, BX
+	ADCQ R14, SI
+
+	// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
+
+	ADDQ 0(AX), DX
+	ADCQ 8(AX), CX
+	ADCQ 16(AX), BX
+	ADCQ 24(AX), SI
+
+	// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
+
+	MOVQ DX, 0(AX)
+	MOVQ CX, 8(AX)
+	MOVQ BX, 16(AX)
+	MOVQ SI, 24(AX)
+	RET
+
+// Butterfly(a, b *Element) sets a = a + b; b = a - b
+TEXT ·Butterfly(SB), NOSPLIT, $0-16
+	MOVQ    a+0(FP), AX
+	MOVQ    0(AX), CX
+	MOVQ    8(AX), BX
+	MOVQ    16(AX), SI
+	MOVQ    24(AX), DI
+	MOVQ    CX, R8
+	MOVQ    BX, R9
+	MOVQ    SI, R10
+	MOVQ    DI, R11
+	XORQ    AX, AX
+	MOVQ    b+8(FP), DX
+	ADDQ    0(DX), CX
+	ADCQ    8(DX), BX
+	ADCQ    16(DX), SI
+	ADCQ    24(DX), DI
+	SUBQ    0(DX), R8
+	SBBQ    8(DX), R9
+	SBBQ    16(DX), R10
+	SBBQ    24(DX), R11
+	MOVQ    $0x43e1f593f0000001, R12
+	MOVQ    $0x2833e84879b97091, R13
+	MOVQ    $0xb85045b68181585d, R14
+	MOVQ    $0x30644e72e131a029, R15
+	CMOVQCC AX, R12
+	CMOVQCC AX, R13
+	CMOVQCC AX, R14
+	CMOVQCC AX, R15
+	ADDQ    R12, R8
+	ADCQ    R13, R9
+	ADCQ    R14, R10
+	ADCQ    R15, R11
+	MOVQ    R8, 0(DX)
+	MOVQ    R9, 8(DX)
+	MOVQ    R10, 16(DX)
+	MOVQ    R11, 24(DX)
+
+	// reduce element(CX,BX,SI,DI) using temp registers (R8,R9,R10,R11)
+	REDUCE(CX,BX,SI,DI,R8,R9,R10,R11)
+
+	MOVQ a+0(FP), AX
+	MOVQ CX, 0(AX)
+	MOVQ BX, 8(AX)
+	MOVQ SI, 16(AX)
+	MOVQ DI, 24(AX)
+	RET
diff --git a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/element_ops_noasm.go b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/element_ops_noasm.go
index e7daa4d40ee..ec1fac18d63 100644
--- a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/element_ops_noasm.go
+++ b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/element_ops_noasm.go
@@ -1,3 +1,4 @@
+//go:build !amd64
 // +build !amd64
 
 // Copyright 2020 ConsenSys Software Inc.
@@ -34,6 +35,18 @@ func MulBy5(x *Element) {
 	mulByConstant(x, 5)
 }
 
+// MulBy13 x *= 13
+func MulBy13(x *Element) {
+	mulByConstant(x, 13)
+}
+
+// Butterfly sets
+// a = a + b
+// b = a - b
+func Butterfly(a, b *Element) {
+	_butterflyGeneric(a, b)
+}
+
 func mul(z, x, y *Element) {
 	_mulGeneric(z, x, y)
 }
diff --git a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/mimc/doc.go b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/mimc/doc.go
new file mode 100644
index 00000000000..497bd40a972
--- /dev/null
+++ b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/mimc/doc.go
@@ -0,0 +1,18 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+// Package mimc provides MiMC hash function using Miyaguchi–Preneel construction.
+package mimc
diff --git a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/mimc/fuzz.go b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/mimc/fuzz.go
new file mode 100644
index 00000000000..41b557cf3f7
--- /dev/null
+++ b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/mimc/fuzz.go
@@ -0,0 +1,34 @@
+//go:build gofuzz
+// +build gofuzz
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package mimc
+
+const (
+	fuzzInteresting = 1
+	fuzzNormal      = 0
+	fuzzDiscard     = -1
+)
+
+func Fuzz(data []byte) int {
+	var s []byte
+	h := NewMiMC(string(data))
+	h.Write(data)
+	h.Sum(s)
+	return fuzzNormal
+}
diff --git a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/mimc/mimc.go b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/mimc/mimc.go
new file mode 100644
index 00000000000..22492ad185f
--- /dev/null
+++ b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fr/mimc/mimc.go
@@ -0,0 +1,174 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package mimc
+
+import (
+	"hash"
+	"math/big"
+
+	"github.com/consensys/gnark-crypto/ecc/bn254/fr"
+	"golang.org/x/crypto/sha3"
+)
+
+const mimcNbRounds = 91
+
+// BlockSize size that mimc consumes
+const BlockSize = fr.Bytes
+
+// Params constants for the mimc hash function
+type Params []fr.Element
+
+// NewParams creates new mimc object
+func NewParams(seed string) Params {
+
+	// set the constants
+	res := make(Params, mimcNbRounds)
+
+	rnd := sha3.Sum256([]byte(seed))
+	value := new(big.Int).SetBytes(rnd[:])
+
+	for i := 0; i < mimcNbRounds; i++ {
+		rnd = sha3.Sum256(value.Bytes())
+		value.SetBytes(rnd[:])
+		res[i].SetBigInt(value)
+	}
+
+	return res
+}
+
+// digest represents the partial evaluation of the checksum
+// along with the params of the mimc function
+type digest struct {
+	Params Params
+	h      fr.Element
+	data   []byte // data to hash
+}
+
+// NewMiMC returns a MiMCImpl object, pure-go reference implementation
+func NewMiMC(seed string) hash.Hash {
+	d := new(digest)
+	params := NewParams(seed)
+	//d.Reset()
+	d.Params = params
+	d.Reset()
+	return d
+}
+
+// Reset resets the Hash to its initial state.
+func (d *digest) Reset() {
+	d.data = nil
+	d.h = fr.Element{0, 0, 0, 0}
+}
+
+// Sum appends the current hash to b and returns the resulting slice.
+// It does not change the underlying hash state.
+func (d *digest) Sum(b []byte) []byte {
+	buffer := d.checksum()
+	d.data = nil // flush the data already hashed
+	hash := buffer.Bytes()
+	b = append(b, hash[:]...)
+	return b
+}
+
+// BlockSize returns the hash's underlying block size.
+// The Write method must be able to accept any amount
+// of data, but it may operate more efficiently if all writes
+// are a multiple of the block size.
+func (d *digest) Size() int {
+	return BlockSize
+}
+
+// BlockSize returns the number of bytes Sum will return.
+func (d *digest) BlockSize() int {
+	return BlockSize
+}
+
+// Write (via the embedded io.Writer interface) adds more data to the running hash.
+// It never returns an error.
+func (d *digest) Write(p []byte) (n int, err error) {
+	n = len(p)
+	d.data = append(d.data, p...)
+	return
+}
+
+// Hash hash using Miyaguchi–Preneel:
+// https://en.wikipedia.org/wiki/One-way_compression_function
+// The XOR operation is replaced by field addition, data is in Montgomery form
+func (d *digest) checksum() fr.Element {
+
+	var buffer [BlockSize]byte
+	var x fr.Element
+
+	// if data size is not multiple of BlockSizes we padd:
+	// .. || 0xaf8 -> .. || 0x0000...0af8
+	if len(d.data)%BlockSize != 0 {
+		q := len(d.data) / BlockSize
+		r := len(d.data) % BlockSize
+		sliceq := make([]byte, q*BlockSize)
+		copy(sliceq, d.data)
+		slicer := make([]byte, r)
+		copy(slicer, d.data[q*BlockSize:])
+		sliceremainder := make([]byte, BlockSize-r)
+		d.data = append(sliceq, sliceremainder...)
+		d.data = append(d.data, slicer...)
+	}
+
+	if len(d.data) == 0 {
+		d.data = make([]byte, 32)
+	}
+
+	nbChunks := len(d.data) / BlockSize
+
+	for i := 0; i < nbChunks; i++ {
+		copy(buffer[:], d.data[i*BlockSize:(i+1)*BlockSize])
+		x.SetBytes(buffer[:])
+		d.encrypt(x)
+		d.h.Add(&x, &d.h)
+	}
+
+	return d.h
+}
+
+// plain execution of a mimc run
+// m: message
+// k: encryption key
+func (d *digest) encrypt(m fr.Element) {
+
+	for i := 0; i < len(d.Params); i++ {
+		// m = (m+k+c)^5
+		var tmp fr.Element
+		tmp.Add(&m, &d.h).Add(&tmp, &d.Params[i])
+		m.Square(&tmp).
+			Square(&m).
+			Mul(&m, &tmp)
+	}
+	m.Add(&m, &d.h)
+	d.h = m
+}
+
+// Sum computes the mimc hash of msg from seed
+func Sum(seed string, msg []byte) ([]byte, error) {
+	params := NewParams(seed)
+	var d digest
+	d.Params = params
+	if _, err := d.Write(msg); err != nil {
+		return nil, err
+	}
+	h := d.checksum()
+	bytes := h.Bytes()
+	return bytes[:], nil
+}
diff --git a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fuzz.go b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fuzz.go
new file mode 100644
index 00000000000..69b9c450507
--- /dev/null
+++ b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/fuzz.go
@@ -0,0 +1,76 @@
+//go:build gofuzz
+// +build gofuzz
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bn254
+
+import (
+	"bytes"
+	"github.com/consensys/gnark-crypto/ecc/bn254/fp"
+	"github.com/consensys/gnark-crypto/ecc/bn254/fr"
+	"github.com/consensys/gnark-crypto/ecc/bn254/fr/mimc"
+	"math/big"
+)
+
+const (
+	fuzzInteresting = 1
+	fuzzNormal      = 0
+	fuzzDiscard     = -1
+)
+
+func Fuzz(data []byte) int {
+	// TODO separate in multiple FuzzXXX and update continuous fuzzer scripts
+	// else, we don't really benefits for fuzzer strategy.
+	fr.Fuzz(data)
+	fp.Fuzz(data)
+	mimc.Fuzz(data)
+
+	// fuzz pairing
+	r := bytes.NewReader(data)
+	var e1, e2 fr.Element
+	e1.SetRawBytes(r)
+	e2.SetRawBytes(r)
+
+	{
+		var r, r1, r2, r1r2, zero GT
+		var b1, b2, b1b2 big.Int
+		e1.ToBigIntRegular(&b1)
+		e2.ToBigIntRegular(&b2)
+		b1b2.Mul(&b1, &b2)
+
+		var p1 G1Affine
+		var p2 G2Affine
+
+		p1.ScalarMultiplication(&g1GenAff, &b1)
+		p2.ScalarMultiplication(&g2GenAff, &b2)
+
+		r, _ = Pair([]G1Affine{g1GenAff}, []G2Affine{g2GenAff})
+		r1, _ = Pair([]G1Affine{p1}, []G2Affine{g2GenAff})
+		r2, _ = Pair([]G1Affine{g1GenAff}, []G2Affine{p2})
+
+		r1r2.Exp(&r, b1b2)
+		r1.Exp(&r1, b2)
+		r2.Exp(&r2, b1)
+
+		if !(r1r2.Equal(&r1) && r1r2.Equal(&r2) && !r.Equal(&zero)) {
+			panic("pairing bilinearity check failed")
+		}
+	}
+
+	return fuzzNormal
+}
diff --git a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/g1.go b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/g1.go
index 787bdc0411e..8a5f823d00e 100644
--- a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/g1.go
+++ b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/g1.go
@@ -17,7 +17,9 @@
 package bn254
 
 import (
+	"math"
 	"math/big"
+	"runtime"
 
 	"github.com/consensys/gnark-crypto/ecc"
 	"github.com/consensys/gnark-crypto/ecc/bn254/fp"
@@ -58,6 +60,30 @@ func (p *G1Affine) ScalarMultiplication(a *G1Affine, s *big.Int) *G1Affine {
 	return p
 }
 
+// Add adds two point in affine coordinates.
+// This should rarely be used as it is very inneficient compared to Jacobian
+// TODO implement affine addition formula
+func (p *G1Affine) Add(a, b *G1Affine) *G1Affine {
+	var p1, p2 G1Jac
+	p1.FromAffine(a)
+	p2.FromAffine(b)
+	p1.AddAssign(&p2)
+	p.FromJacobian(&p1)
+	return p
+}
+
+// Sub subs two point in affine coordinates.
+// This should rarely be used as it is very inneficient compared to Jacobian
+// TODO implement affine addition formula
+func (p *G1Affine) Sub(a, b *G1Affine) *G1Affine {
+	var p1, p2 G1Jac
+	p1.FromAffine(a)
+	p2.FromAffine(b)
+	p1.SubAssign(&p2)
+	p.FromJacobian(&p1)
+	return p
+}
+
 // Equal tests if two points (in Affine coordinates) are equal
 func (p *G1Affine) Equal(a *G1Affine) bool {
 	return p.X.Equal(&a.X) && p.Y.Equal(&a.Y)
@@ -112,7 +138,7 @@ func (p *G1Affine) IsOnCurve() bool {
 func (p *G1Affine) IsInSubGroup() bool {
 	var _p G1Jac
 	_p.FromAffine(p)
-	return _p.IsOnCurve() && _p.IsInSubGroup()
+	return _p.IsInSubGroup()
 }
 
 // -------------------------------------------------------------------------------------------------
@@ -391,7 +417,6 @@ func (p *G1Jac) phi(a *G1Jac) *G1Jac {
 func (p *G1Jac) mulGLV(a *G1Jac, s *big.Int) *G1Jac {
 
 	var table [15]G1Jac
-	var zero big.Int
 	var res G1Jac
 	var k1, k2 fr.Element
 
@@ -404,11 +429,11 @@ func (p *G1Jac) mulGLV(a *G1Jac, s *big.Int) *G1Jac {
 	// split the scalar, modifies +-a, phi(a) accordingly
 	k := ecc.SplitScalar(s, &glvBasis)
 
-	if k[0].Cmp(&zero) == -1 {
+	if k[0].Sign() == -1 {
 		k[0].Neg(&k[0])
 		table[0].Neg(&table[0])
 	}
-	if k[1].Cmp(&zero) == -1 {
+	if k[1].Sign() == -1 {
 		k[1].Neg(&k[1])
 		table[3].Neg(&table[3])
 	}
@@ -434,7 +459,7 @@ func (p *G1Jac) mulGLV(a *G1Jac, s *big.Int) *G1Jac {
 	k2.SetBigInt(&k[1]).FromMont()
 
 	// loop starts from len(k1)/2 due to the bounds
-	for i := len(k1)/2 - 1; i >= 0; i-- {
+	for i := int(math.Ceil(fr.Limbs/2. - 1)); i >= 0; i-- {
 		mask := uint64(3) << 62
 		for j := 0; j < 32; j++ {
 			res.Double(&res).Double(&res)
@@ -753,10 +778,10 @@ func (p *g1JacExtended) doubleMixed(q *G1Affine) *g1JacExtended {
 	return p
 }
 
-// BatchJacobianToAffineG1Affine converts points in Jacobian coordinates to Affine coordinates
+// BatchJacobianToAffineG1 converts points in Jacobian coordinates to Affine coordinates
 // performing a single field inversion (Montgomery batch inversion trick)
 // result must be allocated with len(result) == len(points)
-func BatchJacobianToAffineG1Affine(points []G1Jac, result []G1Affine) {
+func BatchJacobianToAffineG1(points []G1Jac, result []G1Affine) {
 	zeroes := make([]bool, len(points))
 	accumulator := fp.One()
 
@@ -843,7 +868,7 @@ func BatchScalarMultiplicationG1(base *G1Affine, scalars []fr.Element) []G1Affin
 		baseTable[i].AddMixed(base)
 	}
 
-	pScalars := partitionScalars(scalars, c)
+	pScalars, _ := partitionScalars(scalars, c, false, runtime.NumCPU())
 
 	// compute offset and word selector / shift to select the right bits of our windows
 	selectors := make([]selector, nbChunks)
@@ -863,7 +888,7 @@ func BatchScalarMultiplicationG1(base *G1Affine, scalars []fr.Element) []G1Affin
 	}
 	// convert our base exp table into affine to use AddMixed
 	baseTableAff := make([]G1Affine, (1 << (c - 1)))
-	BatchJacobianToAffineG1Affine(baseTable, baseTableAff)
+	BatchJacobianToAffineG1(baseTable, baseTableAff)
 	toReturn := make([]G1Jac, len(scalars))
 
 	// for each digit, take value in the base table, double it c time, voila.
@@ -906,6 +931,6 @@ func BatchScalarMultiplicationG1(base *G1Affine, scalars []fr.Element) []G1Affin
 		}
 	})
 	toReturnAff := make([]G1Affine, len(scalars))
-	BatchJacobianToAffineG1Affine(toReturn, toReturnAff)
+	BatchJacobianToAffineG1(toReturn, toReturnAff)
 	return toReturnAff
 }
diff --git a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/g2.go b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/g2.go
index a037b715136..6c918b5da62 100644
--- a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/g2.go
+++ b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/g2.go
@@ -17,7 +17,9 @@
 package bn254
 
 import (
+	"math"
 	"math/big"
+	"runtime"
 
 	"github.com/consensys/gnark-crypto/ecc"
 	"github.com/consensys/gnark-crypto/ecc/bn254/fr"
@@ -63,6 +65,30 @@ func (p *G2Affine) ScalarMultiplication(a *G2Affine, s *big.Int) *G2Affine {
 	return p
 }
 
+// Add adds two point in affine coordinates.
+// This should rarely be used as it is very inneficient compared to Jacobian
+// TODO implement affine addition formula
+func (p *G2Affine) Add(a, b *G2Affine) *G2Affine {
+	var p1, p2 G2Jac
+	p1.FromAffine(a)
+	p2.FromAffine(b)
+	p1.AddAssign(&p2)
+	p.FromJacobian(&p1)
+	return p
+}
+
+// Sub subs two point in affine coordinates.
+// This should rarely be used as it is very inneficient compared to Jacobian
+// TODO implement affine addition formula
+func (p *G2Affine) Sub(a, b *G2Affine) *G2Affine {
+	var p1, p2 G2Jac
+	p1.FromAffine(a)
+	p2.FromAffine(b)
+	p1.SubAssign(&p2)
+	p.FromJacobian(&p1)
+	return p
+}
+
 // Equal tests if two points (in Affine coordinates) are equal
 func (p *G2Affine) Equal(a *G2Affine) bool {
 	return p.X.Equal(&a.X) && p.Y.Equal(&a.Y)
@@ -117,7 +143,7 @@ func (p *G2Affine) IsOnCurve() bool {
 func (p *G2Affine) IsInSubGroup() bool {
 	var _p G2Jac
 	_p.FromAffine(p)
-	return _p.IsOnCurve() && _p.IsInSubGroup()
+	return _p.IsInSubGroup()
 }
 
 // -------------------------------------------------------------------------------------------------
@@ -346,20 +372,12 @@ func (p *G2Jac) IsOnCurve() bool {
 }
 
 // IsInSubGroup returns true if p is on the r-torsion, false otherwise.
-// Z[r,0]+Z[-lambdaG2Affine, 1] is the kernel
-// of (u,v)->u+lambdaG2Affinev mod r. Expressing r, lambdaG2Affine as
-// polynomials in x, a short vector of this Zmodule is
-// (4x+2), (-12x**2+4*x). So we check that (4x+2)p+(-12x**2+4*x)phi(p)
-// is the infinity.
+// [r]P == 0 <==> Frob(P) == [6x^2]P
 func (p *G2Jac) IsInSubGroup() bool {
-
-	var res, xphip, phip G2Jac
-	phip.phi(p)
-	xphip.ScalarMultiplication(&phip, &xGen)           // x*phi(p)
-	res.Double(&xphip).AddAssign(&xphip)               // 3x*phi(p)
-	res.AddAssign(&phip).SubAssign(p)                  // 3x*phi(p)+phi(p)-p
-	res.Double(&res).ScalarMultiplication(&res, &xGen) // 6x**2*phi(p)+2x*phi(p)-2x*p
-	res.SubAssign(p).Double(&res)                      // 12x**2*phi(p)+4x*phi(p)-4x*p-2p
+	var a, res G2Jac
+	a.psi(p)
+	res.ScalarMultiplication(p, &fixedCoeff).
+		SubAssign(&a)
 
 	return res.IsOnCurve() && res.Z.IsZero()
 
@@ -416,7 +434,6 @@ func (p *G2Jac) phi(a *G2Jac) *G2Jac {
 func (p *G2Jac) mulGLV(a *G2Jac, s *big.Int) *G2Jac {
 
 	var table [15]G2Jac
-	var zero big.Int
 	var res G2Jac
 	var k1, k2 fr.Element
 
@@ -429,11 +446,11 @@ func (p *G2Jac) mulGLV(a *G2Jac, s *big.Int) *G2Jac {
 	// split the scalar, modifies +-a, phi(a) accordingly
 	k := ecc.SplitScalar(s, &glvBasis)
 
-	if k[0].Cmp(&zero) == -1 {
+	if k[0].Sign() == -1 {
 		k[0].Neg(&k[0])
 		table[0].Neg(&table[0])
 	}
-	if k[1].Cmp(&zero) == -1 {
+	if k[1].Sign() == -1 {
 		k[1].Neg(&k[1])
 		table[3].Neg(&table[3])
 	}
@@ -459,7 +476,7 @@ func (p *G2Jac) mulGLV(a *G2Jac, s *big.Int) *G2Jac {
 	k2.SetBigInt(&k[1]).FromMont()
 
 	// loop starts from len(k1)/2 due to the bounds
-	for i := len(k1)/2 - 1; i >= 0; i-- {
+	for i := int(math.Ceil(fr.Limbs/2. - 1)); i >= 0; i-- {
 		mask := uint64(3) << 62
 		for j := 0; j < 32; j++ {
 			res.Double(&res).Double(&res)
@@ -477,7 +494,7 @@ func (p *G2Jac) mulGLV(a *G2Jac, s *big.Int) *G2Jac {
 	return p
 }
 
-// ClearCofactor ...
+// ClearCofactor maps a point in curve to r-torsion
 func (p *G2Affine) ClearCofactor(a *G2Affine) *G2Affine {
 	var _p G2Jac
 	_p.FromAffine(a)
@@ -486,7 +503,7 @@ func (p *G2Affine) ClearCofactor(a *G2Affine) *G2Affine {
 	return p
 }
 
-// ClearCofactor ...
+// ClearCofactor maps a point in curve to r-torsion
 func (p *G2Jac) ClearCofactor(a *G2Jac) *G2Jac {
 	// cf http://cacr.uwaterloo.ca/techreports/2011/cacr2011-26.pdf, 6.1
 	var points [4]G2Jac
@@ -822,6 +839,13 @@ func (p *g2Proj) Set(a *g2Proj) *g2Proj {
 	return p
 }
 
+// Neg computes -G
+func (p *g2Proj) Neg(a *g2Proj) *g2Proj {
+	*p = *a
+	p.y.Neg(&a.y)
+	return p
+}
+
 // FromJacobian converts a point from Jacobian to projective coordinates
 func (p *g2Proj) FromJacobian(Q *G2Jac) *g2Proj {
 	var buf fptower.E2
@@ -890,7 +914,7 @@ func BatchScalarMultiplicationG2(base *G2Affine, scalars []fr.Element) []G2Affin
 		baseTable[i].AddMixed(base)
 	}
 
-	pScalars := partitionScalars(scalars, c)
+	pScalars, _ := partitionScalars(scalars, c, false, runtime.NumCPU())
 
 	// compute offset and word selector / shift to select the right bits of our windows
 	selectors := make([]selector, nbChunks)
diff --git a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/internal/fptower/asm.go b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/internal/fptower/asm.go
index c7bb911dfb9..0ec192019d8 100644
--- a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/internal/fptower/asm.go
+++ b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/internal/fptower/asm.go
@@ -1,3 +1,4 @@
+//go:build !noadx
 // +build !noadx
 
 // Copyright 2020 ConsenSys Software Inc.
diff --git a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/internal/fptower/asm_noadx.go b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/internal/fptower/asm_noadx.go
index f09b13900c3..6a09c11c492 100644
--- a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/internal/fptower/asm_noadx.go
+++ b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/internal/fptower/asm_noadx.go
@@ -1,3 +1,4 @@
+//go:build noadx
 // +build noadx
 
 // Copyright 2020 ConsenSys Software Inc.
diff --git a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/internal/fptower/e12.go b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/internal/fptower/e12.go
index ef2c170e43b..3f8c763fb80 100644
--- a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/internal/fptower/e12.go
+++ b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/internal/fptower/e12.go
@@ -20,7 +20,6 @@ import (
 	"encoding/binary"
 	"errors"
 	"github.com/consensys/gnark-crypto/ecc/bn254/fp"
-	"github.com/consensys/gnark-crypto/ecc/bn254/fr"
 	"math/big"
 )
 
@@ -135,7 +134,186 @@ func (z *E12) Square(x *E12) *E12 {
 	return z
 }
 
-// CyclotomicSquare https://eprint.iacr.org/2009/565.pdf, 3.2
+// Karabina's compressed cyclotomic square
+// https://eprint.iacr.org/2010/542.pdf
+// Th. 3.2 with minor modifications to fit our tower
+func (z *E12) CyclotomicSquareCompressed(x *E12) *E12 {
+
+	var t [7]E2
+
+	// t0 = g1^2
+	t[0].Square(&x.C0.B1)
+	// t1 = g5^2
+	t[1].Square(&x.C1.B2)
+	// t5 = g1 + g5
+	t[5].Add(&x.C0.B1, &x.C1.B2)
+	// t2 = (g1 + g5)^2
+	t[2].Square(&t[5])
+
+	// t3 = g1^2 + g5^2
+	t[3].Add(&t[0], &t[1])
+	// t5 = 2 * g1 * g5
+	t[5].Sub(&t[2], &t[3])
+
+	// t6 = g3 + g2
+	t[6].Add(&x.C1.B0, &x.C0.B2)
+	// t3 = (g3 + g2)^2
+	t[3].Square(&t[6])
+	// t2 = g3^2
+	t[2].Square(&x.C1.B0)
+
+	// t6 = 2 * nr * g1 * g5
+	t[6].MulByNonResidue(&t[5])
+	// t5 = 4 * nr * g1 * g5 + 2 * g3
+	t[5].Add(&t[6], &x.C1.B0).
+		Double(&t[5])
+	// z3 = 6 * nr * g1 * g5 + 2 * g3
+	z.C1.B0.Add(&t[5], &t[6])
+
+	// t4 = nr * g5^2
+	t[4].MulByNonResidue(&t[1])
+	// t5 = nr * g5^2 + g1^2
+	t[5].Add(&t[0], &t[4])
+	// t6 = nr * g5^2 + g1^2 - g2
+	t[6].Sub(&t[5], &x.C0.B2)
+
+	// t1 = g2^2
+	t[1].Square(&x.C0.B2)
+
+	// t6 = 2 * nr * g5^2 + 2 * g1^2 - 2*g2
+	t[6].Double(&t[6])
+	// z2 = 3 * nr * g5^2 + 3 * g1^2 - 2*g2
+	z.C0.B2.Add(&t[6], &t[5])
+
+	// t4 = nr * g2^2
+	t[4].MulByNonResidue(&t[1])
+	// t5 = g3^2 + nr * g2^2
+	t[5].Add(&t[2], &t[4])
+	// t6 = g3^2 + nr * g2^2 - g1
+	t[6].Sub(&t[5], &x.C0.B1)
+	// t6 = 2 * g3^2 + 2 * nr * g2^2 - 2 * g1
+	t[6].Double(&t[6])
+	// z1 = 3 * g3^2 + 3 * nr * g2^2 - 2 * g1
+	z.C0.B1.Add(&t[6], &t[5])
+
+	// t0 = g2^2 + g3^2
+	t[0].Add(&t[2], &t[1])
+	// t5 = 2 * g3 * g2
+	t[5].Sub(&t[3], &t[0])
+	// t6 = 2 * g3 * g2 + g5
+	t[6].Add(&t[5], &x.C1.B2)
+	// t6 = 4 * g3 * g2 + 2 * g5
+	t[6].Double(&t[6])
+	// z5 = 6 * g3 * g2 + 2 * g5
+	z.C1.B2.Add(&t[5], &t[6])
+
+	return z
+}
+
+// Decompress Karabina's cyclotomic square result
+func (z *E12) Decompress(x *E12) *E12 {
+
+	var t [3]E2
+	var one E2
+	one.SetOne()
+
+	// t0 = g1^2
+	t[0].Square(&x.C0.B1)
+	// t1 = 3 * g1^2 - 2 * g2
+	t[1].Sub(&t[0], &x.C0.B2).
+		Double(&t[1]).
+		Add(&t[1], &t[0])
+		// t0 = E * g5^2 + t1
+	t[2].Square(&x.C1.B2)
+	t[0].MulByNonResidue(&t[2]).
+		Add(&t[0], &t[1])
+	// t1 = 1/(4 * g3)
+	t[1].Double(&x.C1.B0).
+		Double(&t[1]).
+		Inverse(&t[1]) // costly
+	// z4 = g4
+	z.C1.B1.Mul(&t[0], &t[1])
+
+	// t1 = g2 * g1
+	t[1].Mul(&x.C0.B2, &x.C0.B1)
+	// t2 = 2 * g4^2 - 3 * g2 * g1
+	t[2].Square(&z.C1.B1).
+		Sub(&t[2], &t[1]).
+		Double(&t[2]).
+		Sub(&t[2], &t[1])
+	// t1 = g3 * g5
+	t[1].Mul(&x.C1.B0, &x.C1.B2)
+	// c_0 = E * (2 * g4^2 + g3 * g5 - 3 * g2 * g1) + 1
+	t[2].Add(&t[2], &t[1])
+	z.C0.B0.MulByNonResidue(&t[2]).
+		Add(&z.C0.B0, &one)
+
+	z.C0.B1.Set(&x.C0.B1)
+	z.C0.B2.Set(&x.C0.B2)
+	z.C1.B0.Set(&x.C1.B0)
+	z.C1.B2.Set(&x.C1.B2)
+
+	return z
+}
+
+// BatchDecompress multiple Karabina's cyclotomic square results
+func BatchDecompress(x []E12) []E12 {
+
+	n := len(x)
+	if n == 0 {
+		return x
+	}
+
+	t0 := make([]E2, n)
+	t1 := make([]E2, n)
+	t2 := make([]E2, n)
+
+	var one E2
+	one.SetOne()
+
+	for i := 0; i < n; i++ {
+		// t0 = g1^2
+		t0[i].Square(&x[i].C0.B1)
+		// t1 = 3 * g1^2 - 2 * g2
+		t1[i].Sub(&t0[i], &x[i].C0.B2).
+			Double(&t1[i]).
+			Add(&t1[i], &t0[i])
+			// t0 = E * g5^2 + t1
+		t2[i].Square(&x[i].C1.B2)
+		t0[i].MulByNonResidue(&t2[i]).
+			Add(&t0[i], &t1[i])
+		// t1 = 4 * g3
+		t1[i].Double(&x[i].C1.B0).
+			Double(&t1[i])
+	}
+
+	t1 = BatchInvert(t1) // costs 1 inverse
+
+	for i := 0; i < n; i++ {
+		// z4 = g4
+		x[i].C1.B1.Mul(&t0[i], &t1[i])
+
+		// t1 = g2 * g1
+		t1[i].Mul(&x[i].C0.B2, &x[i].C0.B1)
+		// t2 = 2 * g4^2 - 3 * g2 * g1
+		t2[i].Square(&x[i].C1.B1)
+		t2[i].Sub(&t2[i], &t1[i])
+		t2[i].Double(&t2[i])
+		t2[i].Sub(&t2[i], &t1[i])
+
+		// t1 = g3 * g5
+		t1[i].Mul(&x[i].C1.B0, &x[i].C1.B2)
+		// z0 = E * (2 * g4^2 + g3 * g5 - 3 * g2 * g1) + 1
+		t2[i].Add(&t2[i], &t1[i])
+		x[i].C0.B0.MulByNonResidue(&t2[i]).
+			Add(&x[i].C0.B0, &one)
+	}
+
+	return x
+}
+
+// Granger-Scott's cyclotomic square
+// https://eprint.iacr.org/2009/565.pdf, 3.2
 func (z *E12) CyclotomicSquare(x *E12) *E12 {
 
 	// x=(x0,x1,x2,x3,x4,x5,x6,x7) in E2^6
@@ -336,20 +514,19 @@ func (z *E12) SetBytes(e []byte) error {
 
 	z.C1.B2.A1.SetBytes(e[0 : 0+fp.Bytes])
 
-	// TODO is it the right place?
-	//if !z.IsInSubGroup() {
-	//	return errors.New("subgroup check failed")
-	//}
-
 	return nil
 }
 
-var frModulus = fr.Modulus()
-
 // IsInSubGroup ensures GT/E12 is in correct sugroup
 func (z *E12) IsInSubGroup() bool {
-	var one, _z E12
-	one.SetOne()
-	_z.Exp(z, *frModulus)
-	return _z.Equal(&one)
+	var a, b, _b E12
+
+	a.Frobenius(z)
+	b.Expt(z).
+		Expt(&b).
+		CyclotomicSquare(&b)
+	_b.CyclotomicSquare(&b)
+	b.Mul(&b, &_b)
+
+	return a.Equal(&b)
 }
diff --git a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/internal/fptower/e12_pairing.go b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/internal/fptower/e12_pairing.go
index 6e7d4d3b752..b29bc3fefcb 100644
--- a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/internal/fptower/e12_pairing.go
+++ b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/internal/fptower/e12_pairing.go
@@ -1,130 +1,186 @@
 package fptower
 
-import (
-	"math/bits"
-)
-
-// MulByVW set z to x*(y*v*w) and return z
-// here y*v*w means the E12 element with C1.B1=y and all other components 0
-func (z *E12) MulByVW(x *E12, y *E2) *E12 {
-
-	var result E12
-	var yNR E2
-
-	yNR.MulByNonResidue(y)
-	result.C0.B0.Mul(&x.C1.B1, &yNR)
-	result.C0.B1.Mul(&x.C1.B2, &yNR)
-	result.C0.B2.Mul(&x.C1.B0, y)
-	result.C1.B0.Mul(&x.C0.B2, &yNR)
-	result.C1.B1.Mul(&x.C0.B0, y)
-	result.C1.B2.Mul(&x.C0.B1, y)
-	z.Set(&result)
-	return z
-}
-
-// MulByV set z to x*(y*v) and return z
-// here y*v means the E12 element with C0.B1=y and all other components 0
-func (z *E12) MulByV(x *E12, y *E2) *E12 {
-
-	var result E12
-	var yNR E2
-
-	yNR.MulByNonResidue(y)
-	result.C0.B0.Mul(&x.C0.B2, &yNR)
-	result.C0.B1.Mul(&x.C0.B0, y)
-	result.C0.B2.Mul(&x.C0.B1, y)
-	result.C1.B0.Mul(&x.C1.B2, &yNR)
-	result.C1.B1.Mul(&x.C1.B0, y)
-	result.C1.B2.Mul(&x.C1.B1, y)
-	z.Set(&result)
-	return z
+func (z *E12) nSquare(n int) {
+	for i := 0; i < n; i++ {
+		z.CyclotomicSquare(z)
+	}
 }
 
-// MulByV2W set z to x*(y*v^2*w) and return z
-// here y*v^2*w means the E12 element with C1.B2=y and all other components 0
-func (z *E12) MulByV2W(x *E12, y *E2) *E12 {
-
-	var result E12
-	var yNR E2
-
-	yNR.MulByNonResidue(y)
-	result.C0.B0.Mul(&x.C1.B0, &yNR)
-	result.C0.B1.Mul(&x.C1.B1, &yNR)
-	result.C0.B2.Mul(&x.C1.B2, &yNR)
-	result.C1.B0.Mul(&x.C0.B1, &yNR)
-	result.C1.B1.Mul(&x.C0.B2, &yNR)
-	result.C1.B2.Mul(&x.C0.B0, y)
-	z.Set(&result)
-	return z
+func (z *E12) nSquareCompressed(n int) {
+	for i := 0; i < n; i++ {
+		z.CyclotomicSquareCompressed(z)
+	}
 }
 
-// Expt set z to x^t in E12 and return z (t is the generator of the BN curve)
+// Expt set z to x^t in E12 and return z (t is the generator of the curve)
 func (z *E12) Expt(x *E12) *E12 {
+	// Expt computation is derived from the addition chain:
+	//
+	//	_10     = 2*1
+	//	_100    = 2*_10
+	//	_1000   = 2*_100
+	//	_10000  = 2*_1000
+	//	_10001  = 1 + _10000
+	//	_10011  = _10 + _10001
+	//	_10100  = 1 + _10011
+	//	_11001  = _1000 + _10001
+	//	_100010 = 2*_10001
+	//	_100111 = _10011 + _10100
+	//	_101001 = _10 + _100111
+	//	i27     = (_100010 << 6 + _100 + _11001) << 7 + _11001
+	//	i44     = (i27 << 8 + _101001 + _10) << 6 + _10001
+	//	i70     = ((i44 << 8 + _101001) << 6 + _101001) << 10
+	//	return    (_100111 + i70) << 6 + _101001 + _1000
+	//
+	// Operations: 62 squares 17 multiplies
+	//
+	// Generated by github.com/mmcloughlin/addchain v0.4.0.
 
-	const tAbsVal uint64 = 4965661367192848881
+	// Allocate Temporaries.
+	var result, t0, t1, t2, t3, t4, t5, t6 E12
 
-	var result E12
-	result.Set(x)
+	// Step 1: t3 = x^0x2
+	t3.CyclotomicSquare(x)
 
-	l := bits.Len64(tAbsVal) - 2
-	for i := l; i >= 0; i-- {
-		result.CyclotomicSquare(&result)
-		if tAbsVal&(1<<uint(i)) != 0 {
-			result.Mul(&result, x)
-		}
-	}
+	// Step 2: t5 = x^0x4
+	t5.CyclotomicSquare(&t3)
+
+	// Step 3: result = x^0x8
+	result.CyclotomicSquare(&t5)
+
+	// Step 4: t0 = x^0x10
+	t0.CyclotomicSquare(&result)
+
+	// Step 5: t2 = x^0x11
+	t2.Mul(x, &t0)
+
+	// Step 6: t0 = x^0x13
+	t0.Mul(&t3, &t2)
+
+	// Step 7: t1 = x^0x14
+	t1.Mul(x, &t0)
+
+	// Step 8: t4 = x^0x19
+	t4.Mul(&result, &t2)
+
+	// Step 9: t6 = x^0x22
+	t6.CyclotomicSquare(&t2)
+
+	// Step 10: t1 = x^0x27
+	t1.Mul(&t0, &t1)
+
+	// Step 11: t0 = x^0x29
+	t0.Mul(&t3, &t1)
+
+	// Step 17: t6 = x^0x880
+	t6.nSquare(6)
+
+	// Step 18: t5 = x^0x884
+	t5.Mul(&t5, &t6)
+
+	// Step 19: t5 = x^0x89d
+	t5.Mul(&t4, &t5)
+
+	// Step 26: t5 = x^0x44e80
+	t5.nSquare(7)
+
+	// Step 27: t4 = x^0x44e99
+	t4.Mul(&t4, &t5)
+
+	// Step 35: t4 = x^0x44e9900
+	t4.nSquare(8)
+
+	// Step 36: t4 = x^0x44e9929
+	t4.Mul(&t0, &t4)
+
+	// Step 37: t3 = x^0x44e992b
+	t3.Mul(&t3, &t4)
+
+	// Step 43: t3 = x^0x113a64ac0
+	t3.nSquare(6)
+
+	// Step 44: t2 = x^0x113a64ad1
+	t2.Mul(&t2, &t3)
+
+	// Step 52: t2 = x^0x113a64ad100
+	t2.nSquare(8)
+
+	// Step 53: t2 = x^0x113a64ad129
+	t2.Mul(&t0, &t2)
+
+	// Step 59: t2 = x^0x44e992b44a40
+	t2.nSquare(6)
+
+	// Step 60: t2 = x^0x44e992b44a69
+	t2.Mul(&t0, &t2)
+
+	// Step 70: t2 = x^0x113a64ad129a400
+	t2.nSquare(10)
+
+	// Step 71: t1 = x^0x113a64ad129a427
+	t1.Mul(&t1, &t2)
+
+	// Step 77: t1 = x^0x44e992b44a6909c0
+	t1.nSquare(6)
+
+	// Step 78: t0 = x^0x44e992b44a6909e9
+	t0.Mul(&t0, &t1)
+
+	// Step 79: result = x^0x44e992b44a6909f1
+	z.Mul(&result, &t0)
 
-	z.Set(&result)
 	return z
 }
 
-// MulBy034 multiplication by sparse element
+// MulBy034 multiplication by sparse element (c0,0,0,c3,c4,0)
 func (z *E12) MulBy034(c0, c3, c4 *E2) *E12 {
 
-	var z0, z1, z2, z3, z4, z5, tmp1, tmp2 E2
-	var t [12]E2
-
-	z0 = z.C0.B0
-	z1 = z.C0.B1
-	z2 = z.C0.B2
-	z3 = z.C1.B0
-	z4 = z.C1.B1
-	z5 = z.C1.B2
-
-	tmp1.MulByNonResidue(c3)
-	tmp2.MulByNonResidue(c4)
-
-	t[0].Mul(&tmp1, &z5)
-	t[1].Mul(&tmp2, &z4)
-	t[2].Mul(c3, &z3)
-	t[3].Mul(&tmp2, &z5)
-	t[4].Mul(c3, &z4)
-	t[5].Mul(c4, &z3)
-	t[6].Mul(c3, &z0)
-	t[7].Mul(&tmp2, &z2)
-	t[8].Mul(c3, &z1)
-	t[9].Mul(c4, &z0)
-	t[10].Mul(c3, &z2)
-	t[11].Mul(c4, &z1)
-
-	z.C0.B0.Mul(c0, &z0).
-		Add(&z.C0.B0, &t[0]).
-		Add(&z.C0.B0, &t[1])
-	z.C0.B1.Mul(c0, &z1).
-		Add(&z.C0.B1, &t[2]).
-		Add(&z.C0.B1, &t[3])
-	z.C0.B2.Mul(c0, &z2).
-		Add(&z.C0.B2, &t[4]).
-		Add(&z.C0.B2, &t[5])
-	z.C1.B0.Mul(c0, &z3).
-		Add(&z.C1.B0, &t[6]).
-		Add(&z.C1.B0, &t[7])
-	z.C1.B1.Mul(c0, &z4).
-		Add(&z.C1.B1, &t[8]).
-		Add(&z.C1.B1, &t[9])
-	z.C1.B2.Mul(c0, &z5).
-		Add(&z.C1.B2, &t[10]).
-		Add(&z.C1.B2, &t[11])
+	var a, b, d E6
+
+	a.MulByE2(&z.C0, c0)
+
+	b.Set(&z.C1)
+	b.MulBy01(c3, c4)
+
+	c0.Add(c0, c3)
+	d.Add(&z.C0, &z.C1)
+	d.MulBy01(c0, c4)
+
+	z.C1.Add(&a, &b).Neg(&z.C1).Add(&z.C1, &d)
+	z.C0.MulByNonResidue(&b).Add(&z.C0, &a)
+
+	return z
+}
+
+// Mul034By034 multiplication of sparse element (c0,0,0,c3,c4,0) by sparse element (d0,0,0,d3,d4,0)
+func (z *E12) Mul034by034(d0, d3, d4, c0, c3, c4 *E2) *E12 {
+	var tmp, x0, x3, x4, x04, x03, x34 E2
+	x0.Mul(c0, d0)
+	x3.Mul(c3, d3)
+	x4.Mul(c4, d4)
+	tmp.Add(c0, c4)
+	x04.Add(d0, d4).
+		Mul(&x04, &tmp).
+		Sub(&x04, &x0).
+		Sub(&x04, &x4)
+	tmp.Add(c0, c3)
+	x03.Add(d0, d3).
+		Mul(&x03, &tmp).
+		Sub(&x03, &x0).
+		Sub(&x03, &x3)
+	tmp.Add(c3, c4)
+	x34.Add(d3, d4).
+		Mul(&x34, &tmp).
+		Sub(&x34, &x3).
+		Sub(&x34, &x4)
+
+	z.C0.B0.MulByNonResidue(&x4).
+		Add(&z.C0.B0, &x0)
+	z.C0.B1.Set(&x3)
+	z.C0.B2.Set(&x34)
+	z.C1.B0.Set(&x03)
+	z.C1.B1.Set(&x04)
+	z.C1.B2.SetZero()
 
 	return z
 }
diff --git a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/internal/fptower/e2.go b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/internal/fptower/e2.go
index 76b508bd320..9c1b4c7aff3 100644
--- a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/internal/fptower/e2.go
+++ b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/internal/fptower/e2.go
@@ -157,6 +157,12 @@ func (z *E2) Conjugate(x *E2) *E2 {
 	return z
 }
 
+// Halve sets z = z / 2
+func (z *E2) Halve() {
+	z.A0.Halve()
+	z.A1.Halve()
+}
+
 // Legendre returns the Legendre symbol of z
 func (z *E2) Legendre() int {
 	var n fp.Element
@@ -221,3 +227,37 @@ func (z *E2) Sqrt(x *E2) *E2 {
 	z.Set(&b)
 	return z
 }
+
+// BatchInvert returns a new slice with every element inverted.
+// Uses Montgomery batch inversion trick
+func BatchInvert(a []E2) []E2 {
+	res := make([]E2, len(a))
+	if len(a) == 0 {
+		return res
+	}
+
+	zeroes := make([]bool, len(a))
+	var accumulator E2
+	accumulator.SetOne()
+
+	for i := 0; i < len(a); i++ {
+		if a[i].IsZero() {
+			zeroes[i] = true
+			continue
+		}
+		res[i].Set(&accumulator)
+		accumulator.Mul(&accumulator, &a[i])
+	}
+
+	accumulator.Inverse(&accumulator)
+
+	for i := len(a) - 1; i >= 0; i-- {
+		if zeroes[i] {
+			continue
+		}
+		res[i].Mul(&res[i], &accumulator)
+		accumulator.Mul(&accumulator, &a[i])
+	}
+
+	return res
+}
diff --git a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/internal/fptower/e2_adx_amd64.s b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/internal/fptower/e2_adx_amd64.s
index cda60cdbb8d..aabff96ad1a 100644
--- a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/internal/fptower/e2_adx_amd64.s
+++ b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/internal/fptower/e2_adx_amd64.s
@@ -330,10 +330,10 @@ TEXT ·negE2(SB), NOSPLIT, $0-16
 	ORQ   R8, AX
 	TESTQ AX, AX
 	JNE   l1
-	MOVQ  AX, 32(DX)
-	MOVQ  AX, 40(DX)
-	MOVQ  AX, 48(DX)
-	MOVQ  AX, 56(DX)
+	MOVQ  AX, 0(DX)
+	MOVQ  AX, 8(DX)
+	MOVQ  AX, 16(DX)
+	MOVQ  AX, 24(DX)
 	JMP   l3
 
 l1:
diff --git a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/internal/fptower/e2_amd64.s b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/internal/fptower/e2_amd64.s
index 48a1035bae4..d0c8e8a3d9c 100644
--- a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/internal/fptower/e2_amd64.s
+++ b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/internal/fptower/e2_amd64.s
@@ -330,10 +330,10 @@ TEXT ·negE2(SB), NOSPLIT, $0-16
 	ORQ   R8, AX
 	TESTQ AX, AX
 	JNE   l1
-	MOVQ  AX, 32(DX)
-	MOVQ  AX, 40(DX)
-	MOVQ  AX, 48(DX)
-	MOVQ  AX, 56(DX)
+	MOVQ  AX, 0(DX)
+	MOVQ  AX, 8(DX)
+	MOVQ  AX, 16(DX)
+	MOVQ  AX, 24(DX)
 	JMP   l3
 
 l1:
diff --git a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/internal/fptower/e2_bn254_fallback.go b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/internal/fptower/e2_bn254_fallback.go
index 8c04804b23d..467a96f77eb 100644
--- a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/internal/fptower/e2_bn254_fallback.go
+++ b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/internal/fptower/e2_bn254_fallback.go
@@ -1,3 +1,4 @@
+//go:build !amd64
 // +build !amd64
 
 // Copyright 2020 ConsenSys AG
diff --git a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/internal/fptower/e2_fallback.go b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/internal/fptower/e2_fallback.go
index eada4c6b580..0ce4d833347 100644
--- a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/internal/fptower/e2_fallback.go
+++ b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/internal/fptower/e2_fallback.go
@@ -1,3 +1,4 @@
+//go:build !amd64
 // +build !amd64
 
 // Copyright 2020 ConsenSys Software Inc.
diff --git a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/internal/fptower/e6.go b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/internal/fptower/e6.go
index 5ba35cf0207..adc33ceefd5 100644
--- a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/internal/fptower/e6.go
+++ b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/internal/fptower/e6.go
@@ -123,6 +123,70 @@ func (z *E6) MulByNonResidue(x *E6) *E6 {
 	return z
 }
 
+// MulByE2 multiplies an element in E6 by an element in E2
+func (z *E6) MulByE2(x *E6, y *E2) *E6 {
+	var yCopy E2
+	yCopy.Set(y)
+	z.B0.Mul(&x.B0, &yCopy)
+	z.B1.Mul(&x.B1, &yCopy)
+	z.B2.Mul(&x.B2, &yCopy)
+	return z
+}
+
+// MulBy01 multiplication by sparse element (c0,c1,0)
+func (z *E6) MulBy01(c0, c1 *E2) *E6 {
+
+	var a, b, tmp, t0, t1, t2 E2
+
+	a.Mul(&z.B0, c0)
+	b.Mul(&z.B1, c1)
+
+	tmp.Add(&z.B1, &z.B2)
+	t0.Mul(c1, &tmp)
+	t0.Sub(&t0, &b)
+	t0.MulByNonResidue(&t0)
+	t0.Add(&t0, &a)
+
+	tmp.Add(&z.B0, &z.B2)
+	t2.Mul(c0, &tmp)
+	t2.Sub(&t2, &a)
+	t2.Add(&t2, &b)
+
+	t1.Add(c0, c1)
+	tmp.Add(&z.B0, &z.B1)
+	t1.Mul(&t1, &tmp)
+	t1.Sub(&t1, &a)
+	t1.Sub(&t1, &b)
+
+	z.B0.Set(&t0)
+	z.B1.Set(&t1)
+	z.B2.Set(&t2)
+
+	return z
+}
+
+// MulBy1 multiplication of E6 by sparse element (0, c1, 0)
+func (z *E6) MulBy1(c1 *E2) *E6 {
+
+	var b, tmp, t0, t1 E2
+	b.Mul(&z.B1, c1)
+
+	tmp.Add(&z.B1, &z.B2)
+	t0.Mul(c1, &tmp)
+	t0.Sub(&t0, &b)
+	t0.MulByNonResidue(&t0)
+
+	tmp.Add(&z.B0, &z.B1)
+	t1.Mul(c1, &tmp)
+	t1.Sub(&t1, &b)
+
+	z.B0.Set(&t0)
+	z.B1.Set(&t1)
+	z.B2.Set(&b)
+
+	return z
+}
+
 // Mul sets z to the E6 product of x,y, returns z
 func (z *E6) Mul(x, y *E6) *E6 {
 	// Algorithm 13 from https://eprint.iacr.org/2010/354.pdf
diff --git a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/marshal.go b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/marshal.go
index 68a0b40523d..08194ed7930 100644
--- a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/marshal.go
+++ b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/marshal.go
@@ -14,7 +14,6 @@
 
 // Code generated by consensys/gnark-crypto DO NOT EDIT
 
-// Package bn254 provides efficient elliptic curve and pairing implementation for bn254
 package bn254
 
 import (
@@ -54,14 +53,21 @@ type Encoder struct {
 
 // Decoder reads bn254 object values from an inbound stream
 type Decoder struct {
-	r io.Reader
-	n int64 // read bytes
+	r             io.Reader
+	n             int64 // read bytes
+	subGroupCheck bool  // default to true
 }
 
 // NewDecoder returns a binary decoder supporting curve bn254 objects in both
 // compressed and uncompressed (raw) forms
-func NewDecoder(r io.Reader) *Decoder {
-	return &Decoder{r: r}
+func NewDecoder(r io.Reader, options ...func(*Decoder)) *Decoder {
+	d := &Decoder{r: r, subGroupCheck: true}
+
+	for _, o := range options {
+		o(d)
+	}
+
+	return d
 }
 
 // Decode reads the binary encoding of v from the stream
@@ -73,35 +79,69 @@ func (dec *Decoder) Decode(v interface{}) (err error) {
 	}
 
 	// implementation note: code is a bit verbose (abusing code generation), but minimize allocations on the heap
+	// in particular, careful attention must be given to usage of Bytes() method on Elements and Points
+	// that return an array (not a slice) of bytes. Using this is beneficial to minimize memallocs
+	// in very large (de)serialization upstream in gnark.
+	// (but detrimental to code lisibility here)
 	// TODO double check memory usage and factorize this
 
 	var buf [SizeOfG2AffineUncompressed]byte
 	var read int
 
 	switch t := v.(type) {
-	case *uint64:
-		var r uint64
-		r, err = dec.readUint64()
+	case *fr.Element:
+		read, err = io.ReadFull(dec.r, buf[:fr.Bytes])
+		dec.n += int64(read)
 		if err != nil {
 			return
 		}
-		*t = r
+		t.SetBytes(buf[:fr.Bytes])
 		return
-	case *fr.Element:
-		read, err = io.ReadFull(dec.r, buf[:fr.Limbs*8])
+	case *fp.Element:
+		read, err = io.ReadFull(dec.r, buf[:fp.Bytes])
 		dec.n += int64(read)
 		if err != nil {
 			return
 		}
-		t.SetBytes(buf[:fr.Limbs*8])
+		t.SetBytes(buf[:fp.Bytes])
 		return
-	case *fp.Element:
-		read, err = io.ReadFull(dec.r, buf[:fp.Limbs*8])
-		dec.n += int64(read)
+	case *[]fr.Element:
+		var sliceLen uint32
+		sliceLen, err = dec.readUint32()
 		if err != nil {
 			return
 		}
-		t.SetBytes(buf[:fp.Limbs*8])
+		if len(*t) != int(sliceLen) {
+			*t = make([]fr.Element, sliceLen)
+		}
+
+		for i := 0; i < len(*t); i++ {
+			read, err = io.ReadFull(dec.r, buf[:fr.Bytes])
+			dec.n += int64(read)
+			if err != nil {
+				return
+			}
+			(*t)[i].SetBytes(buf[:fr.Bytes])
+		}
+		return
+	case *[]fp.Element:
+		var sliceLen uint32
+		sliceLen, err = dec.readUint32()
+		if err != nil {
+			return
+		}
+		if len(*t) != int(sliceLen) {
+			*t = make([]fp.Element, sliceLen)
+		}
+
+		for i := 0; i < len(*t); i++ {
+			read, err = io.ReadFull(dec.r, buf[:fp.Bytes])
+			dec.n += int64(read)
+			if err != nil {
+				return
+			}
+			(*t)[i].SetBytes(buf[:fp.Bytes])
+		}
 		return
 	case *G1Affine:
 		// we start by reading compressed point size, if metadata tells us it is uncompressed, we read more.
@@ -121,7 +161,7 @@ func (dec *Decoder) Decode(v interface{}) (err error) {
 				return
 			}
 		}
-		_, err = t.SetBytes(buf[:nbBytes])
+		_, err = t.setBytes(buf[:nbBytes], dec.subGroupCheck)
 		return
 	case *G2Affine:
 		// we start by reading compressed point size, if metadata tells us it is uncompressed, we read more.
@@ -141,7 +181,7 @@ func (dec *Decoder) Decode(v interface{}) (err error) {
 				return
 			}
 		}
-		_, err = t.SetBytes(buf[:nbBytes])
+		_, err = t.setBytes(buf[:nbBytes], dec.subGroupCheck)
 		return
 	case *[]G1Affine:
 		var sliceLen uint32
@@ -171,7 +211,7 @@ func (dec *Decoder) Decode(v interface{}) (err error) {
 				if err != nil {
 					return
 				}
-				_, err = (*t)[i].SetBytes(buf[:nbBytes])
+				_, err = (*t)[i].setBytes(buf[:nbBytes], false)
 				if err != nil {
 					return
 				}
@@ -183,7 +223,11 @@ func (dec *Decoder) Decode(v interface{}) (err error) {
 		parallel.Execute(len(compressed), func(start, end int) {
 			for i := start; i < end; i++ {
 				if compressed[i] {
-					if err := (*t)[i].unsafeComputeY(); err != nil {
+					if err := (*t)[i].unsafeComputeY(dec.subGroupCheck); err != nil {
+						atomic.AddUint64(&nbErrs, 1)
+					}
+				} else if dec.subGroupCheck {
+					if !(*t)[i].IsInSubGroup() {
 						atomic.AddUint64(&nbErrs, 1)
 					}
 				}
@@ -222,7 +266,7 @@ func (dec *Decoder) Decode(v interface{}) (err error) {
 				if err != nil {
 					return
 				}
-				_, err = (*t)[i].SetBytes(buf[:nbBytes])
+				_, err = (*t)[i].setBytes(buf[:nbBytes], false)
 				if err != nil {
 					return
 				}
@@ -234,7 +278,11 @@ func (dec *Decoder) Decode(v interface{}) (err error) {
 		parallel.Execute(len(compressed), func(start, end int) {
 			for i := start; i < end; i++ {
 				if compressed[i] {
-					if err := (*t)[i].unsafeComputeY(); err != nil {
+					if err := (*t)[i].unsafeComputeY(dec.subGroupCheck); err != nil {
+						atomic.AddUint64(&nbErrs, 1)
+					}
+				} else if dec.subGroupCheck {
+					if !(*t)[i].IsInSubGroup() {
 						atomic.AddUint64(&nbErrs, 1)
 					}
 				}
@@ -246,7 +294,15 @@ func (dec *Decoder) Decode(v interface{}) (err error) {
 
 		return nil
 	default:
-		return errors.New("bn254 encoder: unsupported type")
+		n := binary.Size(t)
+		if n == -1 {
+			return errors.New("bn254 encoder: unsupported type")
+		}
+		err = binary.Read(dec.r, binary.BigEndian, t)
+		if err == nil {
+			dec.n += int64(n)
+		}
+		return
 	}
 }
 
@@ -255,18 +311,6 @@ func (dec *Decoder) BytesRead() int64 {
 	return dec.n
 }
 
-func (dec *Decoder) readUint64() (r uint64, err error) {
-	var read int
-	var buf [8]byte
-	read, err = io.ReadFull(dec.r, buf[:8])
-	dec.n += int64(read)
-	if err != nil {
-		return
-	}
-	r = binary.BigEndian.Uint64(buf[:8])
-	return
-}
-
 func (dec *Decoder) readUint32() (r uint32, err error) {
 	var read int
 	var buf [4]byte
@@ -323,6 +367,14 @@ func RawEncoding() func(*Encoder) {
 	}
 }
 
+// NoSubgroupChecks returns an option to use in NewDecoder(...) which disable subgroup checks on the points
+// the decoder will read. Use with caution, as crafted points from an untrusted source can lead to crypto-attacks.
+func NoSubgroupChecks() func(*Decoder) {
+	return func(dec *Decoder) {
+		dec.subGroupCheck = false
+	}
+}
+
 func (enc *Encoder) encode(v interface{}) (err error) {
 
 	// implementation note: code is a bit verbose (abusing code generation), but minimize allocations on the heap
@@ -330,10 +382,6 @@ func (enc *Encoder) encode(v interface{}) (err error) {
 
 	var written int
 	switch t := v.(type) {
-	case uint64:
-		err = binary.Write(enc.w, binary.BigEndian, t)
-		enc.n += 8
-		return
 	case *fr.Element:
 		buf := t.Bytes()
 		written, err = enc.w.Write(buf[:])
@@ -354,6 +402,41 @@ func (enc *Encoder) encode(v interface{}) (err error) {
 		written, err = enc.w.Write(buf[:])
 		enc.n += int64(written)
 		return
+	case []fr.Element:
+		// write slice length
+		err = binary.Write(enc.w, binary.BigEndian, uint32(len(t)))
+		if err != nil {
+			return
+		}
+		enc.n += 4
+		var buf [fr.Bytes]byte
+		for i := 0; i < len(t); i++ {
+			buf = t[i].Bytes()
+			written, err = enc.w.Write(buf[:])
+			enc.n += int64(written)
+			if err != nil {
+				return
+			}
+		}
+		return nil
+	case []fp.Element:
+		// write slice length
+		err = binary.Write(enc.w, binary.BigEndian, uint32(len(t)))
+		if err != nil {
+			return
+		}
+		enc.n += 4
+		var buf [fp.Bytes]byte
+		for i := 0; i < len(t); i++ {
+			buf = t[i].Bytes()
+			written, err = enc.w.Write(buf[:])
+			enc.n += int64(written)
+			if err != nil {
+				return
+			}
+		}
+		return nil
+
 	case []G1Affine:
 		// write slice length
 		err = binary.Write(enc.w, binary.BigEndian, uint32(len(t)))
@@ -393,7 +476,13 @@ func (enc *Encoder) encode(v interface{}) (err error) {
 		}
 		return nil
 	default:
-		return errors.New("<no value> encoder: unsupported type")
+		n := binary.Size(t)
+		if n == -1 {
+			return errors.New("<no value> encoder: unsupported type")
+		}
+		err = binary.Write(enc.w, binary.BigEndian, t)
+		enc.n += int64(n)
+		return
 	}
 }
 
@@ -404,10 +493,6 @@ func (enc *Encoder) encodeRaw(v interface{}) (err error) {
 
 	var written int
 	switch t := v.(type) {
-	case uint64:
-		err = binary.Write(enc.w, binary.BigEndian, t)
-		enc.n += 8
-		return
 	case *fr.Element:
 		buf := t.Bytes()
 		written, err = enc.w.Write(buf[:])
@@ -428,6 +513,41 @@ func (enc *Encoder) encodeRaw(v interface{}) (err error) {
 		written, err = enc.w.Write(buf[:])
 		enc.n += int64(written)
 		return
+	case []fr.Element:
+		// write slice length
+		err = binary.Write(enc.w, binary.BigEndian, uint32(len(t)))
+		if err != nil {
+			return
+		}
+		enc.n += 4
+		var buf [fr.Bytes]byte
+		for i := 0; i < len(t); i++ {
+			buf = t[i].Bytes()
+			written, err = enc.w.Write(buf[:])
+			enc.n += int64(written)
+			if err != nil {
+				return
+			}
+		}
+		return nil
+	case []fp.Element:
+		// write slice length
+		err = binary.Write(enc.w, binary.BigEndian, uint32(len(t)))
+		if err != nil {
+			return
+		}
+		enc.n += 4
+		var buf [fp.Bytes]byte
+		for i := 0; i < len(t); i++ {
+			buf = t[i].Bytes()
+			written, err = enc.w.Write(buf[:])
+			enc.n += int64(written)
+			if err != nil {
+				return
+			}
+		}
+		return nil
+
 	case []G1Affine:
 		// write slice length
 		err = binary.Write(enc.w, binary.BigEndian, uint32(len(t)))
@@ -467,7 +587,13 @@ func (enc *Encoder) encodeRaw(v interface{}) (err error) {
 		}
 		return nil
 	default:
-		return errors.New("<no value> encoder: unsupported type")
+		n := binary.Size(t)
+		if n == -1 {
+			return errors.New("<no value> encoder: unsupported type")
+		}
+		err = binary.Write(enc.w, binary.BigEndian, t)
+		enc.n += int64(n)
+		return
 	}
 }
 
@@ -573,6 +699,10 @@ func (p *G1Affine) RawBytes() (res [SizeOfG1AffineUncompressed]byte) {
 // the Y coordinate (i.e the square root doesn't exist) this function retunrs an error
 // this check if the resulting point is on the curve and in the correct subgroup
 func (p *G1Affine) SetBytes(buf []byte) (int, error) {
+	return p.setBytes(buf, true)
+}
+
+func (p *G1Affine) setBytes(buf []byte, subGroupCheck bool) (int, error) {
 	if len(buf) < SizeOfG1AffineCompressed {
 		return 0, io.ErrShortBuffer
 	}
@@ -601,7 +731,7 @@ func (p *G1Affine) SetBytes(buf []byte) (int, error) {
 		p.Y.SetBytes(buf[fp.Bytes : fp.Bytes*2])
 
 		// subgroup check
-		if !p.IsInSubGroup() {
+		if subGroupCheck && !p.IsInSubGroup() {
 			return 0, errors.New("invalid point: subgroup check failed")
 		}
 
@@ -643,7 +773,7 @@ func (p *G1Affine) SetBytes(buf []byte) (int, error) {
 	p.Y = Y
 
 	// subgroup check
-	if !p.IsInSubGroup() {
+	if subGroupCheck && !p.IsInSubGroup() {
 		return 0, errors.New("invalid point: subgroup check failed")
 	}
 
@@ -652,7 +782,7 @@ func (p *G1Affine) SetBytes(buf []byte) (int, error) {
 
 // unsafeComputeY called by Decoder when processing slices of compressed point in parallel (step 2)
 // it computes the Y coordinate from the already set X coordinate and is compute intensive
-func (p *G1Affine) unsafeComputeY() error {
+func (p *G1Affine) unsafeComputeY(subGroupCheck bool) error {
 	// stored in unsafeSetCompressedBytes
 
 	mData := byte(p.Y[0])
@@ -681,7 +811,7 @@ func (p *G1Affine) unsafeComputeY() error {
 	p.Y = Y
 
 	// subgroup check
-	if !p.IsInSubGroup() {
+	if subGroupCheck && !p.IsInSubGroup() {
 		return errors.New("invalid point: subgroup check failed")
 	}
 
@@ -764,7 +894,7 @@ func (p *G2Affine) Bytes() (res [SizeOfG2AffineCompressed]byte) {
 	}
 
 	// we store X  and mask the most significant word with our metadata mask
-	// p.X.A0 | p.X.A1
+	// p.X.A1 | p.X.A0
 	tmp = p.X.A0
 	tmp.FromMont()
 	binary.BigEndian.PutUint64(res[56:64], tmp[0])
@@ -801,7 +931,7 @@ func (p *G2Affine) RawBytes() (res [SizeOfG2AffineUncompressed]byte) {
 
 	// not compressed
 	// we store the Y coordinate
-	// p.Y.A0 | p.Y.A1
+	// p.Y.A1 | p.Y.A0
 	tmp = p.Y.A0
 	tmp.FromMont()
 	binary.BigEndian.PutUint64(res[120:128], tmp[0])
@@ -817,7 +947,7 @@ func (p *G2Affine) RawBytes() (res [SizeOfG2AffineUncompressed]byte) {
 	binary.BigEndian.PutUint64(res[64:72], tmp[3])
 
 	// we store X  and mask the most significant word with our metadata mask
-	// p.X.A0 | p.X.A1
+	// p.X.A1 | p.X.A0
 	tmp = p.X.A1
 	tmp.FromMont()
 	binary.BigEndian.PutUint64(res[24:32], tmp[0])
@@ -844,6 +974,10 @@ func (p *G2Affine) RawBytes() (res [SizeOfG2AffineUncompressed]byte) {
 // the Y coordinate (i.e the square root doesn't exist) this function retunrs an error
 // this check if the resulting point is on the curve and in the correct subgroup
 func (p *G2Affine) SetBytes(buf []byte) (int, error) {
+	return p.setBytes(buf, true)
+}
+
+func (p *G2Affine) setBytes(buf []byte, subGroupCheck bool) (int, error) {
 	if len(buf) < SizeOfG2AffineCompressed {
 		return 0, io.ErrShortBuffer
 	}
@@ -868,15 +1002,15 @@ func (p *G2Affine) SetBytes(buf []byte) (int, error) {
 	// uncompressed point
 	if mData == mUncompressed {
 		// read X and Y coordinates
-		// p.X.A1 | p.X.A0
+		// p.X.A1 | p.X.A0
 		p.X.A1.SetBytes(buf[:fp.Bytes])
 		p.X.A0.SetBytes(buf[fp.Bytes : fp.Bytes*2])
-		// p.Y.A1 | p.Y.A0
+		// p.Y.A1 | p.Y.A0
 		p.Y.A1.SetBytes(buf[fp.Bytes*2 : fp.Bytes*3])
 		p.Y.A0.SetBytes(buf[fp.Bytes*3 : fp.Bytes*4])
 
 		// subgroup check
-		if !p.IsInSubGroup() {
+		if subGroupCheck && !p.IsInSubGroup() {
 			return 0, errors.New("invalid point: subgroup check failed")
 		}
 
@@ -893,7 +1027,7 @@ func (p *G2Affine) SetBytes(buf []byte) (int, error) {
 	bufX[0] &= ^mMask
 
 	// read X coordinate
-	// p.X.A1 | p.X.A0
+	// p.X.A1 | p.X.A0
 	p.X.A1.SetBytes(bufX[:fp.Bytes])
 	p.X.A0.SetBytes(buf[fp.Bytes : fp.Bytes*2])
 
@@ -921,7 +1055,7 @@ func (p *G2Affine) SetBytes(buf []byte) (int, error) {
 	p.Y = Y
 
 	// subgroup check
-	if !p.IsInSubGroup() {
+	if subGroupCheck && !p.IsInSubGroup() {
 		return 0, errors.New("invalid point: subgroup check failed")
 	}
 
@@ -930,7 +1064,7 @@ func (p *G2Affine) SetBytes(buf []byte) (int, error) {
 
 // unsafeComputeY called by Decoder when processing slices of compressed point in parallel (step 2)
 // it computes the Y coordinate from the already set X coordinate and is compute intensive
-func (p *G2Affine) unsafeComputeY() error {
+func (p *G2Affine) unsafeComputeY(subGroupCheck bool) error {
 	// stored in unsafeSetCompressedBytes
 
 	mData := byte(p.Y.A0[0])
@@ -960,7 +1094,7 @@ func (p *G2Affine) unsafeComputeY() error {
 	p.Y = Y
 
 	// subgroup check
-	if !p.IsInSubGroup() {
+	if subGroupCheck && !p.IsInSubGroup() {
 		return errors.New("invalid point: subgroup check failed")
 	}
 
@@ -989,7 +1123,7 @@ func (p *G2Affine) unsafeSetCompressedBytes(buf []byte) (isInfinity bool) {
 	bufX[0] &= ^mMask
 
 	// read X coordinate
-	// p.X.A1 | p.X.A0
+	// p.X.A1 | p.X.A0
 	p.X.A1.SetBytes(bufX[:fp.Bytes])
 	p.X.A0.SetBytes(buf[fp.Bytes : fp.Bytes*2])
 
diff --git a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/multiexp.go b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/multiexp.go
index 89d958a575d..088fab69145 100644
--- a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/multiexp.go
+++ b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/multiexp.go
@@ -17,33 +17,14 @@
 package bn254
 
 import (
+	"errors"
+	"github.com/consensys/gnark-crypto/ecc"
 	"github.com/consensys/gnark-crypto/ecc/bn254/fr"
 	"github.com/consensys/gnark-crypto/internal/parallel"
 	"math"
 	"runtime"
-	"sync"
 )
 
-// CPUSemaphore enables users to set optional number of CPUs the multiexp will use
-// this is thread safe and can be used accross parallel calls of MultiExp
-type CPUSemaphore struct {
-	chCpus chan struct{} // semaphore to limit number of cpus iterating through points and scalrs at the same time
-	lock   sync.Mutex
-}
-
-// NewCPUSemaphore returns a new multiExp options to be used with MultiExp
-// this option can be shared between different MultiExp calls and will ensure only numCpus are used
-// through a semaphore
-func NewCPUSemaphore(numCpus int) *CPUSemaphore {
-	toReturn := &CPUSemaphore{
-		chCpus: make(chan struct{}, numCpus),
-	}
-	for i := 0; i < numCpus; i++ {
-		toReturn.chCpus <- struct{}{}
-	}
-	return toReturn
-}
-
 // selector stores the index, mask and shifts needed to select bits from a scalar
 // it is used during the multiExp algorithm or the batch scalar multiplication
 type selector struct {
@@ -61,7 +42,10 @@ type selector struct {
 // 2^{c} to the current digit, making it negative.
 // negative digits can be processed in a later step as adding -G into the bucket instead of G
 // (computing -G is cheap, and this saves us half of the buckets in the MultiExp or BatchScalarMul)
-func partitionScalars(scalars []fr.Element, c uint64) []fr.Element {
+// scalarsMont indicates wheter the provided scalars are in montgomery form
+// returns smallValues, which represent the number of scalars which meets the following condition
+// 0 < scalar < 2^c (in other words, scalars where only the c-least significant bits are non zero)
+func partitionScalars(scalars []fr.Element, c uint64, scalarsMont bool, nbTasks int) ([]fr.Element, int) {
 	toReturn := make([]fr.Element, len(scalars))
 
 	// number of c-bit radixes in a scalar
@@ -92,10 +76,36 @@ func partitionScalars(scalars []fr.Element, c uint64) []fr.Element {
 		selectors[chunk] = d
 	}
 
+	// for each chunk, we could track the number of non-zeros points we will need to process
+	// this way, if a chunk has more work to do than others, we can spawn off more go routines
+	// (at the cost of more buckets allocated)
+	// a simplified approach is to track the small values where only the first word is set
+	// if this number represent a significant number of points, then we will split first chunk
+	// processing in the msm in 2, to ensure all go routines finish at ~same time
+	// /!\ nbTasks is enough as parallel.Execute is not going to spawn more than nbTasks go routine
+	// if it does, though, this will deadlocK.
+	chSmallValues := make(chan int, nbTasks)
+
 	parallel.Execute(len(scalars), func(start, end int) {
+		smallValues := 0
 		for i := start; i < end; i++ {
 			var carry int
 
+			scalar := scalars[i]
+			if scalarsMont {
+				scalar.FromMont()
+			}
+			if scalar.IsUint64() {
+				// everything is 0, no need to process this scalar
+				if scalar[0] == 0 {
+					continue
+				}
+				// low c-bits are 1 in mask
+				if scalar[0]&mask == scalar[0] {
+					smallValues++
+				}
+			}
+
 			// for each chunk in the scalar, compute the current digit, and an eventual carry
 			for chunk := uint64(0); chunk < nbChunks; chunk++ {
 				s := selectors[chunk]
@@ -105,11 +115,16 @@ func partitionScalars(scalars []fr.Element, c uint64) []fr.Element {
 				carry = 0
 
 				// digit = value of the c-bit window
-				digit += int((scalars[i][s.index] & s.mask) >> s.shift)
+				digit += int((scalar[s.index] & s.mask) >> s.shift)
 
 				if s.multiWordSelect {
 					// we are selecting bits over 2 words
-					digit += int(scalars[i][s.index+1]&s.maskHigh) << s.shiftHigh
+					digit += int(scalar[s.index+1]&s.maskHigh) << s.shiftHigh
+				}
+
+				// if digit is zero, no impact on result
+				if digit == 0 {
+					continue
 				}
 
 				// if the digit is larger than 2^{c-1}, then, we borrow 2^c from the next window and substract
@@ -133,24 +148,32 @@ func partitionScalars(scalars []fr.Element, c uint64) []fr.Element {
 
 			}
 		}
-	})
-	return toReturn
+
+		chSmallValues <- smallValues
+
+	}, nbTasks)
+
+	// aggregate small values
+	close(chSmallValues)
+	smallValues := 0
+	for o := range chSmallValues {
+		smallValues += o
+	}
+	return toReturn, smallValues
 }
 
 // MultiExp implements section 4 of https://eprint.iacr.org/2012/549.pdf
-// optionally, takes as parameter a CPUSemaphore struct
-// enabling to set max number of cpus to use
-func (p *G1Affine) MultiExp(points []G1Affine, scalars []fr.Element, opts ...*CPUSemaphore) *G1Affine {
+func (p *G1Affine) MultiExp(points []G1Affine, scalars []fr.Element, config ecc.MultiExpConfig) (*G1Affine, error) {
 	var _p G1Jac
-	_p.MultiExp(points, scalars, opts...)
+	if _, err := _p.MultiExp(points, scalars, config); err != nil {
+		return nil, err
+	}
 	p.FromJacobian(&_p)
-	return p
+	return p, nil
 }
 
 // MultiExp implements section 4 of https://eprint.iacr.org/2012/549.pdf
-// optionally, takes as parameter a CPUSemaphore struct
-// enabling to set max number of cpus to use
-func (p *G1Jac) MultiExp(points []G1Affine, scalars []fr.Element, opts ...*CPUSemaphore) *G1Jac {
+func (p *G1Jac) MultiExp(points []G1Affine, scalars []fr.Element, config ecc.MultiExpConfig) (*G1Jac, error) {
 	// note:
 	// each of the msmCX method is the same, except for the c constant it declares
 	// duplicating (through template generation) these methods allows to declare the buckets on the stack
@@ -177,98 +200,144 @@ func (p *G1Jac) MultiExp(points []G1Affine, scalars []fr.Element, opts ...*CPUSe
 	// step 3
 	// reduce the buckets weigthed sums into our result (msmReduceChunk)
 
-	var opt *CPUSemaphore
-	if len(opts) > 0 {
-		opt = opts[0]
-	} else {
-		opt = NewCPUSemaphore(runtime.NumCPU())
+	// ensure len(points) == len(scalars)
+	nbPoints := len(points)
+	if nbPoints != len(scalars) {
+		return nil, errors.New("len(points) != len(scalars)")
 	}
 
-	var C uint64
-	nbPoints := len(points)
+	// if nbTasks is not set, use all available CPUs
+	if config.NbTasks <= 0 {
+		config.NbTasks = runtime.NumCPU()
+	}
 
-	// implemented msmC methods (the c we use must be in this slice)
-	implementedCs := []uint64{4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 20, 21}
-
-	// approximate cost (in group operations)
-	// cost = bits/c * (nbPoints + 2^{c})
-	// this needs to be verified empirically.
-	// for example, on a MBP 2016, for G2 MultiExp > 8M points, hand picking c gives better results
-	min := math.MaxFloat64
-	for _, c := range implementedCs {
-		cc := fr.Limbs * 64 * (nbPoints + (1 << (c)))
-		cost := float64(cc) / float64(c)
-		if cost < min {
-			min = cost
-			C = c
+	// here, we compute the best C for nbPoints
+	// we split recursively until nbChunks(c) >= nbTasks,
+	bestC := func(nbPoints int) uint64 {
+		// implemented msmC methods (the c we use must be in this slice)
+		implementedCs := []uint64{4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 20, 21}
+		var C uint64
+		// approximate cost (in group operations)
+		// cost = bits/c * (nbPoints + 2^{c})
+		// this needs to be verified empirically.
+		// for example, on a MBP 2016, for G2 MultiExp > 8M points, hand picking c gives better results
+		min := math.MaxFloat64
+		for _, c := range implementedCs {
+			cc := fr.Limbs * 64 * (nbPoints + (1 << (c)))
+			cost := float64(cc) / float64(c)
+			if cost < min {
+				min = cost
+				C = c
+			}
 		}
+		// empirical, needs to be tuned.
+		// if C > 16 && nbPoints < 1 << 23 {
+		// 	C = 16
+		// }
+		return C
 	}
 
-	// empirical, needs to be tuned.
-	// if C > 16 && nbPoints < 1 << 23 {
-	// 	C = 16
-	// }
-
-	// take all the cpus to ourselves
-	opt.lock.Lock()
+	var C uint64
+	nbSplits := 1
+	nbChunks := 0
+	for nbChunks < config.NbTasks {
+		C = bestC(nbPoints)
+		nbChunks = int(fr.Limbs * 64 / C) // number of c-bit radixes in a scalar
+		if (fr.Limbs*64)%C != 0 {
+			nbChunks++
+		}
+		nbChunks *= nbSplits
+		if nbChunks < config.NbTasks {
+			nbSplits <<= 1
+			nbPoints >>= 1
+		}
+	}
 
 	// partition the scalars
 	// note: we do that before the actual chunk processing, as for each c-bit window (starting from LSW)
 	// if it's larger than 2^{c-1}, we have a carry we need to propagate up to the higher window
-	scalars = partitionScalars(scalars, C)
+	var smallValues int
+	scalars, smallValues = partitionScalars(scalars, C, config.ScalarsMont, config.NbTasks)
+
+	// if we have more than 10% of small values, we split the processing of the first chunk in 2
+	// we may want to do that in msmInnerG1Jac , but that would incur a cost of looping through all scalars one more time
+	splitFirstChunk := (float64(smallValues) / float64(len(scalars))) >= 0.1
+
+	// we have nbSplits intermediate results that we must sum together.
+	_p := make([]G1Jac, nbSplits-1)
+	chDone := make(chan int, nbSplits-1)
+	for i := 0; i < nbSplits-1; i++ {
+		start := i * nbPoints
+		end := start + nbPoints
+		go func(start, end, i int) {
+			msmInnerG1Jac(&_p[i], int(C), points[start:end], scalars[start:end], splitFirstChunk)
+			chDone <- i
+		}(start, end, i)
+	}
 
-	switch C {
+	msmInnerG1Jac(p, int(C), points[(nbSplits-1)*nbPoints:], scalars[(nbSplits-1)*nbPoints:], splitFirstChunk)
+	for i := 0; i < nbSplits-1; i++ {
+		done := <-chDone
+		p.AddAssign(&_p[done])
+	}
+	close(chDone)
+	return p, nil
+}
+
+func msmInnerG1Jac(p *G1Jac, c int, points []G1Affine, scalars []fr.Element, splitFirstChunk bool) {
+
+	switch c {
 
 	case 4:
-		return p.msmC4(points, scalars, opt)
+		p.msmC4(points, scalars, splitFirstChunk)
 
 	case 5:
-		return p.msmC5(points, scalars, opt)
+		p.msmC5(points, scalars, splitFirstChunk)
 
 	case 6:
-		return p.msmC6(points, scalars, opt)
+		p.msmC6(points, scalars, splitFirstChunk)
 
 	case 7:
-		return p.msmC7(points, scalars, opt)
+		p.msmC7(points, scalars, splitFirstChunk)
 
 	case 8:
-		return p.msmC8(points, scalars, opt)
+		p.msmC8(points, scalars, splitFirstChunk)
 
 	case 9:
-		return p.msmC9(points, scalars, opt)
+		p.msmC9(points, scalars, splitFirstChunk)
 
 	case 10:
-		return p.msmC10(points, scalars, opt)
+		p.msmC10(points, scalars, splitFirstChunk)
 
 	case 11:
-		return p.msmC11(points, scalars, opt)
+		p.msmC11(points, scalars, splitFirstChunk)
 
 	case 12:
-		return p.msmC12(points, scalars, opt)
+		p.msmC12(points, scalars, splitFirstChunk)
 
 	case 13:
-		return p.msmC13(points, scalars, opt)
+		p.msmC13(points, scalars, splitFirstChunk)
 
 	case 14:
-		return p.msmC14(points, scalars, opt)
+		p.msmC14(points, scalars, splitFirstChunk)
 
 	case 15:
-		return p.msmC15(points, scalars, opt)
+		p.msmC15(points, scalars, splitFirstChunk)
 
 	case 16:
-		return p.msmC16(points, scalars, opt)
+		p.msmC16(points, scalars, splitFirstChunk)
 
 	case 20:
-		return p.msmC20(points, scalars, opt)
+		p.msmC20(points, scalars, splitFirstChunk)
 
 	case 21:
-		return p.msmC21(points, scalars, opt)
+		p.msmC21(points, scalars, splitFirstChunk)
 
 	case 22:
-		return p.msmC22(points, scalars, opt)
+		p.msmC22(points, scalars, splitFirstChunk)
 
 	default:
-		panic("unimplemented")
+		panic("not implemented")
 	}
 }
 
@@ -349,643 +418,832 @@ func msmProcessChunkG1Affine(chunk uint64,
 	}
 
 	chRes <- total
-	close(chRes)
+
 }
 
-func (p *G1Jac) msmC4(points []G1Affine, scalars []fr.Element, opt *CPUSemaphore) *G1Jac {
-	const c = 4                          // scalars partitioned into c-bit radixes
-	const nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+func (p *G1Jac) msmC4(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 4                   // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
 
-	// for each chunk, spawn a go routine that'll loop through all the scalars
+	// each go routine sends its result in chChunks[i] channel
 	var chChunks [nbChunks]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
 
-	// wait group to wait for all the go routines to start
-	var wg sync.WaitGroup
-	for chunk := nbChunks - 1; chunk >= 0; chunk-- {
-		chChunks[chunk] = make(chan g1JacExtended, 1)
-		<-opt.chCpus // wait to have a cpu before scheduling
-		wg.Add(1)
-		go func(j uint64, chRes chan g1JacExtended, points []G1Affine, scalars []fr.Element) {
-			wg.Done()
-			var buckets [1 << (c - 1)]g1JacExtended
-			msmProcessChunkG1Affine(j, chRes, buckets[:], c, points, scalars)
-			opt.chCpus <- struct{}{} // release token in the semaphore
-		}(uint64(chunk), chChunks[chunk], points, scalars)
-	}
-
-	// wait for all goRoutines to actually start
-	wg.Wait()
-
-	// all my tasks are scheduled, I can let other func use avaiable tokens in the semaphore
-	opt.lock.Unlock()
 	return msmReduceChunkG1Affine(p, c, chChunks[:])
 }
 
-func (p *G1Jac) msmC5(points []G1Affine, scalars []fr.Element, opt *CPUSemaphore) *G1Jac {
-	const c = 5                              // scalars partitioned into c-bit radixes
-	const nbChunks = (fr.Limbs * 64 / c) + 1 // number of c-bit radixes in a scalar
-
-	// for each chunk, spawn a go routine that'll loop through all the scalars
-	var chChunks [nbChunks]chan g1JacExtended
+func (p *G1Jac) msmC5(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 5                   // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
 
-	// wait group to wait for all the go routines to start
-	var wg sync.WaitGroup
 	// c doesn't divide 256, last window is smaller we can allocate less buckets
 	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
-	chChunks[nbChunks-1] = make(chan g1JacExtended, 1)
-	<-opt.chCpus // wait to have a cpu before scheduling
-	wg.Add(1)
-	go func(j uint64, chRes chan g1JacExtended, points []G1Affine, scalars []fr.Element) {
-		wg.Done()
+	go func(j uint64, points []G1Affine, scalars []fr.Element) {
 		var buckets [1 << (lastC - 1)]g1JacExtended
-		msmProcessChunkG1Affine(j, chRes, buckets[:], c, points, scalars)
-		opt.chCpus <- struct{}{} // release token in the semaphore
-	}(uint64(nbChunks-1), chChunks[nbChunks-1], points, scalars)
-
-	for chunk := nbChunks - 2; chunk >= 0; chunk-- {
-		chChunks[chunk] = make(chan g1JacExtended, 1)
-		<-opt.chCpus // wait to have a cpu before scheduling
-		wg.Add(1)
-		go func(j uint64, chRes chan g1JacExtended, points []G1Affine, scalars []fr.Element) {
-			wg.Done()
-			var buckets [1 << (c - 1)]g1JacExtended
-			msmProcessChunkG1Affine(j, chRes, buckets[:], c, points, scalars)
-			opt.chCpus <- struct{}{} // release token in the semaphore
-		}(uint64(chunk), chChunks[chunk], points, scalars)
-	}
-
-	// wait for all goRoutines to actually start
-	wg.Wait()
-
-	// all my tasks are scheduled, I can let other func use avaiable tokens in the semaphore
-	opt.lock.Unlock()
+		msmProcessChunkG1Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
 	return msmReduceChunkG1Affine(p, c, chChunks[:])
 }
 
-func (p *G1Jac) msmC6(points []G1Affine, scalars []fr.Element, opt *CPUSemaphore) *G1Jac {
-	const c = 6                              // scalars partitioned into c-bit radixes
-	const nbChunks = (fr.Limbs * 64 / c) + 1 // number of c-bit radixes in a scalar
-
-	// for each chunk, spawn a go routine that'll loop through all the scalars
-	var chChunks [nbChunks]chan g1JacExtended
+func (p *G1Jac) msmC6(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 6                   // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
 
-	// wait group to wait for all the go routines to start
-	var wg sync.WaitGroup
 	// c doesn't divide 256, last window is smaller we can allocate less buckets
 	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
-	chChunks[nbChunks-1] = make(chan g1JacExtended, 1)
-	<-opt.chCpus // wait to have a cpu before scheduling
-	wg.Add(1)
-	go func(j uint64, chRes chan g1JacExtended, points []G1Affine, scalars []fr.Element) {
-		wg.Done()
+	go func(j uint64, points []G1Affine, scalars []fr.Element) {
 		var buckets [1 << (lastC - 1)]g1JacExtended
-		msmProcessChunkG1Affine(j, chRes, buckets[:], c, points, scalars)
-		opt.chCpus <- struct{}{} // release token in the semaphore
-	}(uint64(nbChunks-1), chChunks[nbChunks-1], points, scalars)
-
-	for chunk := nbChunks - 2; chunk >= 0; chunk-- {
-		chChunks[chunk] = make(chan g1JacExtended, 1)
-		<-opt.chCpus // wait to have a cpu before scheduling
-		wg.Add(1)
-		go func(j uint64, chRes chan g1JacExtended, points []G1Affine, scalars []fr.Element) {
-			wg.Done()
-			var buckets [1 << (c - 1)]g1JacExtended
-			msmProcessChunkG1Affine(j, chRes, buckets[:], c, points, scalars)
-			opt.chCpus <- struct{}{} // release token in the semaphore
-		}(uint64(chunk), chChunks[chunk], points, scalars)
-	}
-
-	// wait for all goRoutines to actually start
-	wg.Wait()
-
-	// all my tasks are scheduled, I can let other func use avaiable tokens in the semaphore
-	opt.lock.Unlock()
+		msmProcessChunkG1Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
 	return msmReduceChunkG1Affine(p, c, chChunks[:])
 }
 
-func (p *G1Jac) msmC7(points []G1Affine, scalars []fr.Element, opt *CPUSemaphore) *G1Jac {
-	const c = 7                              // scalars partitioned into c-bit radixes
-	const nbChunks = (fr.Limbs * 64 / c) + 1 // number of c-bit radixes in a scalar
-
-	// for each chunk, spawn a go routine that'll loop through all the scalars
-	var chChunks [nbChunks]chan g1JacExtended
+func (p *G1Jac) msmC7(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 7                   // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
 
-	// wait group to wait for all the go routines to start
-	var wg sync.WaitGroup
 	// c doesn't divide 256, last window is smaller we can allocate less buckets
 	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
-	chChunks[nbChunks-1] = make(chan g1JacExtended, 1)
-	<-opt.chCpus // wait to have a cpu before scheduling
-	wg.Add(1)
-	go func(j uint64, chRes chan g1JacExtended, points []G1Affine, scalars []fr.Element) {
-		wg.Done()
+	go func(j uint64, points []G1Affine, scalars []fr.Element) {
 		var buckets [1 << (lastC - 1)]g1JacExtended
-		msmProcessChunkG1Affine(j, chRes, buckets[:], c, points, scalars)
-		opt.chCpus <- struct{}{} // release token in the semaphore
-	}(uint64(nbChunks-1), chChunks[nbChunks-1], points, scalars)
-
-	for chunk := nbChunks - 2; chunk >= 0; chunk-- {
-		chChunks[chunk] = make(chan g1JacExtended, 1)
-		<-opt.chCpus // wait to have a cpu before scheduling
-		wg.Add(1)
-		go func(j uint64, chRes chan g1JacExtended, points []G1Affine, scalars []fr.Element) {
-			wg.Done()
-			var buckets [1 << (c - 1)]g1JacExtended
-			msmProcessChunkG1Affine(j, chRes, buckets[:], c, points, scalars)
-			opt.chCpus <- struct{}{} // release token in the semaphore
-		}(uint64(chunk), chChunks[chunk], points, scalars)
-	}
-
-	// wait for all goRoutines to actually start
-	wg.Wait()
-
-	// all my tasks are scheduled, I can let other func use avaiable tokens in the semaphore
-	opt.lock.Unlock()
+		msmProcessChunkG1Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
 	return msmReduceChunkG1Affine(p, c, chChunks[:])
 }
 
-func (p *G1Jac) msmC8(points []G1Affine, scalars []fr.Element, opt *CPUSemaphore) *G1Jac {
-	const c = 8                          // scalars partitioned into c-bit radixes
-	const nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+func (p *G1Jac) msmC8(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 8                   // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
 
-	// for each chunk, spawn a go routine that'll loop through all the scalars
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
 	var chChunks [nbChunks]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
 
-	// wait group to wait for all the go routines to start
-	var wg sync.WaitGroup
-	for chunk := nbChunks - 1; chunk >= 0; chunk-- {
-		chChunks[chunk] = make(chan g1JacExtended, 1)
-		<-opt.chCpus // wait to have a cpu before scheduling
-		wg.Add(1)
-		go func(j uint64, chRes chan g1JacExtended, points []G1Affine, scalars []fr.Element) {
-			wg.Done()
-			var buckets [1 << (c - 1)]g1JacExtended
-			msmProcessChunkG1Affine(j, chRes, buckets[:], c, points, scalars)
-			opt.chCpus <- struct{}{} // release token in the semaphore
-		}(uint64(chunk), chChunks[chunk], points, scalars)
-	}
-
-	// wait for all goRoutines to actually start
-	wg.Wait()
-
-	// all my tasks are scheduled, I can let other func use avaiable tokens in the semaphore
-	opt.lock.Unlock()
 	return msmReduceChunkG1Affine(p, c, chChunks[:])
 }
 
-func (p *G1Jac) msmC9(points []G1Affine, scalars []fr.Element, opt *CPUSemaphore) *G1Jac {
-	const c = 9                              // scalars partitioned into c-bit radixes
-	const nbChunks = (fr.Limbs * 64 / c) + 1 // number of c-bit radixes in a scalar
-
-	// for each chunk, spawn a go routine that'll loop through all the scalars
-	var chChunks [nbChunks]chan g1JacExtended
+func (p *G1Jac) msmC9(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 9                   // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
 
-	// wait group to wait for all the go routines to start
-	var wg sync.WaitGroup
 	// c doesn't divide 256, last window is smaller we can allocate less buckets
 	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
-	chChunks[nbChunks-1] = make(chan g1JacExtended, 1)
-	<-opt.chCpus // wait to have a cpu before scheduling
-	wg.Add(1)
-	go func(j uint64, chRes chan g1JacExtended, points []G1Affine, scalars []fr.Element) {
-		wg.Done()
+	go func(j uint64, points []G1Affine, scalars []fr.Element) {
 		var buckets [1 << (lastC - 1)]g1JacExtended
-		msmProcessChunkG1Affine(j, chRes, buckets[:], c, points, scalars)
-		opt.chCpus <- struct{}{} // release token in the semaphore
-	}(uint64(nbChunks-1), chChunks[nbChunks-1], points, scalars)
-
-	for chunk := nbChunks - 2; chunk >= 0; chunk-- {
-		chChunks[chunk] = make(chan g1JacExtended, 1)
-		<-opt.chCpus // wait to have a cpu before scheduling
-		wg.Add(1)
-		go func(j uint64, chRes chan g1JacExtended, points []G1Affine, scalars []fr.Element) {
-			wg.Done()
-			var buckets [1 << (c - 1)]g1JacExtended
-			msmProcessChunkG1Affine(j, chRes, buckets[:], c, points, scalars)
-			opt.chCpus <- struct{}{} // release token in the semaphore
-		}(uint64(chunk), chChunks[chunk], points, scalars)
-	}
-
-	// wait for all goRoutines to actually start
-	wg.Wait()
-
-	// all my tasks are scheduled, I can let other func use avaiable tokens in the semaphore
-	opt.lock.Unlock()
+		msmProcessChunkG1Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
 	return msmReduceChunkG1Affine(p, c, chChunks[:])
 }
 
-func (p *G1Jac) msmC10(points []G1Affine, scalars []fr.Element, opt *CPUSemaphore) *G1Jac {
-	const c = 10                             // scalars partitioned into c-bit radixes
-	const nbChunks = (fr.Limbs * 64 / c) + 1 // number of c-bit radixes in a scalar
-
-	// for each chunk, spawn a go routine that'll loop through all the scalars
-	var chChunks [nbChunks]chan g1JacExtended
+func (p *G1Jac) msmC10(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 10                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
 
-	// wait group to wait for all the go routines to start
-	var wg sync.WaitGroup
 	// c doesn't divide 256, last window is smaller we can allocate less buckets
 	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
-	chChunks[nbChunks-1] = make(chan g1JacExtended, 1)
-	<-opt.chCpus // wait to have a cpu before scheduling
-	wg.Add(1)
-	go func(j uint64, chRes chan g1JacExtended, points []G1Affine, scalars []fr.Element) {
-		wg.Done()
+	go func(j uint64, points []G1Affine, scalars []fr.Element) {
 		var buckets [1 << (lastC - 1)]g1JacExtended
-		msmProcessChunkG1Affine(j, chRes, buckets[:], c, points, scalars)
-		opt.chCpus <- struct{}{} // release token in the semaphore
-	}(uint64(nbChunks-1), chChunks[nbChunks-1], points, scalars)
-
-	for chunk := nbChunks - 2; chunk >= 0; chunk-- {
-		chChunks[chunk] = make(chan g1JacExtended, 1)
-		<-opt.chCpus // wait to have a cpu before scheduling
-		wg.Add(1)
-		go func(j uint64, chRes chan g1JacExtended, points []G1Affine, scalars []fr.Element) {
-			wg.Done()
-			var buckets [1 << (c - 1)]g1JacExtended
-			msmProcessChunkG1Affine(j, chRes, buckets[:], c, points, scalars)
-			opt.chCpus <- struct{}{} // release token in the semaphore
-		}(uint64(chunk), chChunks[chunk], points, scalars)
-	}
-
-	// wait for all goRoutines to actually start
-	wg.Wait()
-
-	// all my tasks are scheduled, I can let other func use avaiable tokens in the semaphore
-	opt.lock.Unlock()
+		msmProcessChunkG1Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
 	return msmReduceChunkG1Affine(p, c, chChunks[:])
 }
 
-func (p *G1Jac) msmC11(points []G1Affine, scalars []fr.Element, opt *CPUSemaphore) *G1Jac {
-	const c = 11                             // scalars partitioned into c-bit radixes
-	const nbChunks = (fr.Limbs * 64 / c) + 1 // number of c-bit radixes in a scalar
-
-	// for each chunk, spawn a go routine that'll loop through all the scalars
-	var chChunks [nbChunks]chan g1JacExtended
+func (p *G1Jac) msmC11(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 11                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
 
-	// wait group to wait for all the go routines to start
-	var wg sync.WaitGroup
 	// c doesn't divide 256, last window is smaller we can allocate less buckets
 	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
-	chChunks[nbChunks-1] = make(chan g1JacExtended, 1)
-	<-opt.chCpus // wait to have a cpu before scheduling
-	wg.Add(1)
-	go func(j uint64, chRes chan g1JacExtended, points []G1Affine, scalars []fr.Element) {
-		wg.Done()
+	go func(j uint64, points []G1Affine, scalars []fr.Element) {
 		var buckets [1 << (lastC - 1)]g1JacExtended
-		msmProcessChunkG1Affine(j, chRes, buckets[:], c, points, scalars)
-		opt.chCpus <- struct{}{} // release token in the semaphore
-	}(uint64(nbChunks-1), chChunks[nbChunks-1], points, scalars)
-
-	for chunk := nbChunks - 2; chunk >= 0; chunk-- {
-		chChunks[chunk] = make(chan g1JacExtended, 1)
-		<-opt.chCpus // wait to have a cpu before scheduling
-		wg.Add(1)
-		go func(j uint64, chRes chan g1JacExtended, points []G1Affine, scalars []fr.Element) {
-			wg.Done()
-			var buckets [1 << (c - 1)]g1JacExtended
-			msmProcessChunkG1Affine(j, chRes, buckets[:], c, points, scalars)
-			opt.chCpus <- struct{}{} // release token in the semaphore
-		}(uint64(chunk), chChunks[chunk], points, scalars)
-	}
-
-	// wait for all goRoutines to actually start
-	wg.Wait()
-
-	// all my tasks are scheduled, I can let other func use avaiable tokens in the semaphore
-	opt.lock.Unlock()
+		msmProcessChunkG1Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
 	return msmReduceChunkG1Affine(p, c, chChunks[:])
 }
 
-func (p *G1Jac) msmC12(points []G1Affine, scalars []fr.Element, opt *CPUSemaphore) *G1Jac {
-	const c = 12                             // scalars partitioned into c-bit radixes
-	const nbChunks = (fr.Limbs * 64 / c) + 1 // number of c-bit radixes in a scalar
-
-	// for each chunk, spawn a go routine that'll loop through all the scalars
-	var chChunks [nbChunks]chan g1JacExtended
+func (p *G1Jac) msmC12(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 12                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
 
-	// wait group to wait for all the go routines to start
-	var wg sync.WaitGroup
 	// c doesn't divide 256, last window is smaller we can allocate less buckets
 	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
-	chChunks[nbChunks-1] = make(chan g1JacExtended, 1)
-	<-opt.chCpus // wait to have a cpu before scheduling
-	wg.Add(1)
-	go func(j uint64, chRes chan g1JacExtended, points []G1Affine, scalars []fr.Element) {
-		wg.Done()
+	go func(j uint64, points []G1Affine, scalars []fr.Element) {
 		var buckets [1 << (lastC - 1)]g1JacExtended
-		msmProcessChunkG1Affine(j, chRes, buckets[:], c, points, scalars)
-		opt.chCpus <- struct{}{} // release token in the semaphore
-	}(uint64(nbChunks-1), chChunks[nbChunks-1], points, scalars)
-
-	for chunk := nbChunks - 2; chunk >= 0; chunk-- {
-		chChunks[chunk] = make(chan g1JacExtended, 1)
-		<-opt.chCpus // wait to have a cpu before scheduling
-		wg.Add(1)
-		go func(j uint64, chRes chan g1JacExtended, points []G1Affine, scalars []fr.Element) {
-			wg.Done()
-			var buckets [1 << (c - 1)]g1JacExtended
-			msmProcessChunkG1Affine(j, chRes, buckets[:], c, points, scalars)
-			opt.chCpus <- struct{}{} // release token in the semaphore
-		}(uint64(chunk), chChunks[chunk], points, scalars)
-	}
-
-	// wait for all goRoutines to actually start
-	wg.Wait()
-
-	// all my tasks are scheduled, I can let other func use avaiable tokens in the semaphore
-	opt.lock.Unlock()
+		msmProcessChunkG1Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
 	return msmReduceChunkG1Affine(p, c, chChunks[:])
 }
 
-func (p *G1Jac) msmC13(points []G1Affine, scalars []fr.Element, opt *CPUSemaphore) *G1Jac {
-	const c = 13                             // scalars partitioned into c-bit radixes
-	const nbChunks = (fr.Limbs * 64 / c) + 1 // number of c-bit radixes in a scalar
-
-	// for each chunk, spawn a go routine that'll loop through all the scalars
-	var chChunks [nbChunks]chan g1JacExtended
+func (p *G1Jac) msmC13(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 13                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
 
-	// wait group to wait for all the go routines to start
-	var wg sync.WaitGroup
 	// c doesn't divide 256, last window is smaller we can allocate less buckets
 	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
-	chChunks[nbChunks-1] = make(chan g1JacExtended, 1)
-	<-opt.chCpus // wait to have a cpu before scheduling
-	wg.Add(1)
-	go func(j uint64, chRes chan g1JacExtended, points []G1Affine, scalars []fr.Element) {
-		wg.Done()
+	go func(j uint64, points []G1Affine, scalars []fr.Element) {
 		var buckets [1 << (lastC - 1)]g1JacExtended
-		msmProcessChunkG1Affine(j, chRes, buckets[:], c, points, scalars)
-		opt.chCpus <- struct{}{} // release token in the semaphore
-	}(uint64(nbChunks-1), chChunks[nbChunks-1], points, scalars)
-
-	for chunk := nbChunks - 2; chunk >= 0; chunk-- {
-		chChunks[chunk] = make(chan g1JacExtended, 1)
-		<-opt.chCpus // wait to have a cpu before scheduling
-		wg.Add(1)
-		go func(j uint64, chRes chan g1JacExtended, points []G1Affine, scalars []fr.Element) {
-			wg.Done()
-			var buckets [1 << (c - 1)]g1JacExtended
-			msmProcessChunkG1Affine(j, chRes, buckets[:], c, points, scalars)
-			opt.chCpus <- struct{}{} // release token in the semaphore
-		}(uint64(chunk), chChunks[chunk], points, scalars)
-	}
-
-	// wait for all goRoutines to actually start
-	wg.Wait()
-
-	// all my tasks are scheduled, I can let other func use avaiable tokens in the semaphore
-	opt.lock.Unlock()
+		msmProcessChunkG1Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
 	return msmReduceChunkG1Affine(p, c, chChunks[:])
 }
 
-func (p *G1Jac) msmC14(points []G1Affine, scalars []fr.Element, opt *CPUSemaphore) *G1Jac {
-	const c = 14                             // scalars partitioned into c-bit radixes
-	const nbChunks = (fr.Limbs * 64 / c) + 1 // number of c-bit radixes in a scalar
-
-	// for each chunk, spawn a go routine that'll loop through all the scalars
-	var chChunks [nbChunks]chan g1JacExtended
+func (p *G1Jac) msmC14(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 14                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
 
-	// wait group to wait for all the go routines to start
-	var wg sync.WaitGroup
 	// c doesn't divide 256, last window is smaller we can allocate less buckets
 	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
-	chChunks[nbChunks-1] = make(chan g1JacExtended, 1)
-	<-opt.chCpus // wait to have a cpu before scheduling
-	wg.Add(1)
-	go func(j uint64, chRes chan g1JacExtended, points []G1Affine, scalars []fr.Element) {
-		wg.Done()
+	go func(j uint64, points []G1Affine, scalars []fr.Element) {
 		var buckets [1 << (lastC - 1)]g1JacExtended
-		msmProcessChunkG1Affine(j, chRes, buckets[:], c, points, scalars)
-		opt.chCpus <- struct{}{} // release token in the semaphore
-	}(uint64(nbChunks-1), chChunks[nbChunks-1], points, scalars)
-
-	for chunk := nbChunks - 2; chunk >= 0; chunk-- {
-		chChunks[chunk] = make(chan g1JacExtended, 1)
-		<-opt.chCpus // wait to have a cpu before scheduling
-		wg.Add(1)
-		go func(j uint64, chRes chan g1JacExtended, points []G1Affine, scalars []fr.Element) {
-			wg.Done()
-			var buckets [1 << (c - 1)]g1JacExtended
-			msmProcessChunkG1Affine(j, chRes, buckets[:], c, points, scalars)
-			opt.chCpus <- struct{}{} // release token in the semaphore
-		}(uint64(chunk), chChunks[chunk], points, scalars)
-	}
-
-	// wait for all goRoutines to actually start
-	wg.Wait()
-
-	// all my tasks are scheduled, I can let other func use avaiable tokens in the semaphore
-	opt.lock.Unlock()
+		msmProcessChunkG1Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
 	return msmReduceChunkG1Affine(p, c, chChunks[:])
 }
 
-func (p *G1Jac) msmC15(points []G1Affine, scalars []fr.Element, opt *CPUSemaphore) *G1Jac {
-	const c = 15                             // scalars partitioned into c-bit radixes
-	const nbChunks = (fr.Limbs * 64 / c) + 1 // number of c-bit radixes in a scalar
-
-	// for each chunk, spawn a go routine that'll loop through all the scalars
-	var chChunks [nbChunks]chan g1JacExtended
+func (p *G1Jac) msmC15(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 15                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
 
-	// wait group to wait for all the go routines to start
-	var wg sync.WaitGroup
 	// c doesn't divide 256, last window is smaller we can allocate less buckets
 	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
-	chChunks[nbChunks-1] = make(chan g1JacExtended, 1)
-	<-opt.chCpus // wait to have a cpu before scheduling
-	wg.Add(1)
-	go func(j uint64, chRes chan g1JacExtended, points []G1Affine, scalars []fr.Element) {
-		wg.Done()
+	go func(j uint64, points []G1Affine, scalars []fr.Element) {
 		var buckets [1 << (lastC - 1)]g1JacExtended
-		msmProcessChunkG1Affine(j, chRes, buckets[:], c, points, scalars)
-		opt.chCpus <- struct{}{} // release token in the semaphore
-	}(uint64(nbChunks-1), chChunks[nbChunks-1], points, scalars)
-
-	for chunk := nbChunks - 2; chunk >= 0; chunk-- {
-		chChunks[chunk] = make(chan g1JacExtended, 1)
-		<-opt.chCpus // wait to have a cpu before scheduling
-		wg.Add(1)
-		go func(j uint64, chRes chan g1JacExtended, points []G1Affine, scalars []fr.Element) {
-			wg.Done()
-			var buckets [1 << (c - 1)]g1JacExtended
-			msmProcessChunkG1Affine(j, chRes, buckets[:], c, points, scalars)
-			opt.chCpus <- struct{}{} // release token in the semaphore
-		}(uint64(chunk), chChunks[chunk], points, scalars)
-	}
-
-	// wait for all goRoutines to actually start
-	wg.Wait()
-
-	// all my tasks are scheduled, I can let other func use avaiable tokens in the semaphore
-	opt.lock.Unlock()
+		msmProcessChunkG1Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
 	return msmReduceChunkG1Affine(p, c, chChunks[:])
 }
 
-func (p *G1Jac) msmC16(points []G1Affine, scalars []fr.Element, opt *CPUSemaphore) *G1Jac {
-	const c = 16                         // scalars partitioned into c-bit radixes
-	const nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+func (p *G1Jac) msmC16(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 16                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
 
-	// for each chunk, spawn a go routine that'll loop through all the scalars
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
 	var chChunks [nbChunks]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
 
-	// wait group to wait for all the go routines to start
-	var wg sync.WaitGroup
-	for chunk := nbChunks - 1; chunk >= 0; chunk-- {
-		chChunks[chunk] = make(chan g1JacExtended, 1)
-		<-opt.chCpus // wait to have a cpu before scheduling
-		wg.Add(1)
-		go func(j uint64, chRes chan g1JacExtended, points []G1Affine, scalars []fr.Element) {
-			wg.Done()
-			var buckets [1 << (c - 1)]g1JacExtended
-			msmProcessChunkG1Affine(j, chRes, buckets[:], c, points, scalars)
-			opt.chCpus <- struct{}{} // release token in the semaphore
-		}(uint64(chunk), chChunks[chunk], points, scalars)
-	}
-
-	// wait for all goRoutines to actually start
-	wg.Wait()
-
-	// all my tasks are scheduled, I can let other func use avaiable tokens in the semaphore
-	opt.lock.Unlock()
 	return msmReduceChunkG1Affine(p, c, chChunks[:])
 }
 
-func (p *G1Jac) msmC20(points []G1Affine, scalars []fr.Element, opt *CPUSemaphore) *G1Jac {
-	const c = 20                             // scalars partitioned into c-bit radixes
-	const nbChunks = (fr.Limbs * 64 / c) + 1 // number of c-bit radixes in a scalar
-
-	// for each chunk, spawn a go routine that'll loop through all the scalars
-	var chChunks [nbChunks]chan g1JacExtended
+func (p *G1Jac) msmC20(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 20                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
 
-	// wait group to wait for all the go routines to start
-	var wg sync.WaitGroup
 	// c doesn't divide 256, last window is smaller we can allocate less buckets
 	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
-	chChunks[nbChunks-1] = make(chan g1JacExtended, 1)
-	<-opt.chCpus // wait to have a cpu before scheduling
-	wg.Add(1)
-	go func(j uint64, chRes chan g1JacExtended, points []G1Affine, scalars []fr.Element) {
-		wg.Done()
+	go func(j uint64, points []G1Affine, scalars []fr.Element) {
 		var buckets [1 << (lastC - 1)]g1JacExtended
-		msmProcessChunkG1Affine(j, chRes, buckets[:], c, points, scalars)
-		opt.chCpus <- struct{}{} // release token in the semaphore
-	}(uint64(nbChunks-1), chChunks[nbChunks-1], points, scalars)
-
-	for chunk := nbChunks - 2; chunk >= 0; chunk-- {
-		chChunks[chunk] = make(chan g1JacExtended, 1)
-		<-opt.chCpus // wait to have a cpu before scheduling
-		wg.Add(1)
-		go func(j uint64, chRes chan g1JacExtended, points []G1Affine, scalars []fr.Element) {
-			wg.Done()
-			var buckets [1 << (c - 1)]g1JacExtended
-			msmProcessChunkG1Affine(j, chRes, buckets[:], c, points, scalars)
-			opt.chCpus <- struct{}{} // release token in the semaphore
-		}(uint64(chunk), chChunks[chunk], points, scalars)
-	}
-
-	// wait for all goRoutines to actually start
-	wg.Wait()
-
-	// all my tasks are scheduled, I can let other func use avaiable tokens in the semaphore
-	opt.lock.Unlock()
+		msmProcessChunkG1Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
 	return msmReduceChunkG1Affine(p, c, chChunks[:])
 }
 
-func (p *G1Jac) msmC21(points []G1Affine, scalars []fr.Element, opt *CPUSemaphore) *G1Jac {
-	const c = 21                             // scalars partitioned into c-bit radixes
-	const nbChunks = (fr.Limbs * 64 / c) + 1 // number of c-bit radixes in a scalar
-
-	// for each chunk, spawn a go routine that'll loop through all the scalars
-	var chChunks [nbChunks]chan g1JacExtended
+func (p *G1Jac) msmC21(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 21                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
 
-	// wait group to wait for all the go routines to start
-	var wg sync.WaitGroup
 	// c doesn't divide 256, last window is smaller we can allocate less buckets
 	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
-	chChunks[nbChunks-1] = make(chan g1JacExtended, 1)
-	<-opt.chCpus // wait to have a cpu before scheduling
-	wg.Add(1)
-	go func(j uint64, chRes chan g1JacExtended, points []G1Affine, scalars []fr.Element) {
-		wg.Done()
+	go func(j uint64, points []G1Affine, scalars []fr.Element) {
 		var buckets [1 << (lastC - 1)]g1JacExtended
-		msmProcessChunkG1Affine(j, chRes, buckets[:], c, points, scalars)
-		opt.chCpus <- struct{}{} // release token in the semaphore
-	}(uint64(nbChunks-1), chChunks[nbChunks-1], points, scalars)
-
-	for chunk := nbChunks - 2; chunk >= 0; chunk-- {
-		chChunks[chunk] = make(chan g1JacExtended, 1)
-		<-opt.chCpus // wait to have a cpu before scheduling
-		wg.Add(1)
-		go func(j uint64, chRes chan g1JacExtended, points []G1Affine, scalars []fr.Element) {
-			wg.Done()
-			var buckets [1 << (c - 1)]g1JacExtended
-			msmProcessChunkG1Affine(j, chRes, buckets[:], c, points, scalars)
-			opt.chCpus <- struct{}{} // release token in the semaphore
-		}(uint64(chunk), chChunks[chunk], points, scalars)
-	}
-
-	// wait for all goRoutines to actually start
-	wg.Wait()
-
-	// all my tasks are scheduled, I can let other func use avaiable tokens in the semaphore
-	opt.lock.Unlock()
+		msmProcessChunkG1Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
 	return msmReduceChunkG1Affine(p, c, chChunks[:])
 }
 
-func (p *G1Jac) msmC22(points []G1Affine, scalars []fr.Element, opt *CPUSemaphore) *G1Jac {
-	const c = 22                             // scalars partitioned into c-bit radixes
-	const nbChunks = (fr.Limbs * 64 / c) + 1 // number of c-bit radixes in a scalar
-
-	// for each chunk, spawn a go routine that'll loop through all the scalars
-	var chChunks [nbChunks]chan g1JacExtended
+func (p *G1Jac) msmC22(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 22                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
 
-	// wait group to wait for all the go routines to start
-	var wg sync.WaitGroup
 	// c doesn't divide 256, last window is smaller we can allocate less buckets
 	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
-	chChunks[nbChunks-1] = make(chan g1JacExtended, 1)
-	<-opt.chCpus // wait to have a cpu before scheduling
-	wg.Add(1)
-	go func(j uint64, chRes chan g1JacExtended, points []G1Affine, scalars []fr.Element) {
-		wg.Done()
+	go func(j uint64, points []G1Affine, scalars []fr.Element) {
 		var buckets [1 << (lastC - 1)]g1JacExtended
-		msmProcessChunkG1Affine(j, chRes, buckets[:], c, points, scalars)
-		opt.chCpus <- struct{}{} // release token in the semaphore
-	}(uint64(nbChunks-1), chChunks[nbChunks-1], points, scalars)
-
-	for chunk := nbChunks - 2; chunk >= 0; chunk-- {
-		chChunks[chunk] = make(chan g1JacExtended, 1)
-		<-opt.chCpus // wait to have a cpu before scheduling
-		wg.Add(1)
-		go func(j uint64, chRes chan g1JacExtended, points []G1Affine, scalars []fr.Element) {
-			wg.Done()
-			var buckets [1 << (c - 1)]g1JacExtended
-			msmProcessChunkG1Affine(j, chRes, buckets[:], c, points, scalars)
-			opt.chCpus <- struct{}{} // release token in the semaphore
-		}(uint64(chunk), chChunks[chunk], points, scalars)
-	}
-
-	// wait for all goRoutines to actually start
-	wg.Wait()
-
-	// all my tasks are scheduled, I can let other func use avaiable tokens in the semaphore
-	opt.lock.Unlock()
+		msmProcessChunkG1Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
 	return msmReduceChunkG1Affine(p, c, chChunks[:])
 }
 
 // MultiExp implements section 4 of https://eprint.iacr.org/2012/549.pdf
-// optionally, takes as parameter a CPUSemaphore struct
-// enabling to set max number of cpus to use
-func (p *G2Affine) MultiExp(points []G2Affine, scalars []fr.Element, opts ...*CPUSemaphore) *G2Affine {
+func (p *G2Affine) MultiExp(points []G2Affine, scalars []fr.Element, config ecc.MultiExpConfig) (*G2Affine, error) {
 	var _p G2Jac
-	_p.MultiExp(points, scalars, opts...)
+	if _, err := _p.MultiExp(points, scalars, config); err != nil {
+		return nil, err
+	}
 	p.FromJacobian(&_p)
-	return p
+	return p, nil
 }
 
 // MultiExp implements section 4 of https://eprint.iacr.org/2012/549.pdf
-// optionally, takes as parameter a CPUSemaphore struct
-// enabling to set max number of cpus to use
-func (p *G2Jac) MultiExp(points []G2Affine, scalars []fr.Element, opts ...*CPUSemaphore) *G2Jac {
+func (p *G2Jac) MultiExp(points []G2Affine, scalars []fr.Element, config ecc.MultiExpConfig) (*G2Jac, error) {
 	// note:
 	// each of the msmCX method is the same, except for the c constant it declares
 	// duplicating (through template generation) these methods allows to declare the buckets on the stack
@@ -1012,98 +1270,144 @@ func (p *G2Jac) MultiExp(points []G2Affine, scalars []fr.Element, opts ...*CPUSe
 	// step 3
 	// reduce the buckets weigthed sums into our result (msmReduceChunk)
 
-	var opt *CPUSemaphore
-	if len(opts) > 0 {
-		opt = opts[0]
-	} else {
-		opt = NewCPUSemaphore(runtime.NumCPU())
+	// ensure len(points) == len(scalars)
+	nbPoints := len(points)
+	if nbPoints != len(scalars) {
+		return nil, errors.New("len(points) != len(scalars)")
 	}
 
-	var C uint64
-	nbPoints := len(points)
+	// if nbTasks is not set, use all available CPUs
+	if config.NbTasks <= 0 {
+		config.NbTasks = runtime.NumCPU()
+	}
 
-	// implemented msmC methods (the c we use must be in this slice)
-	implementedCs := []uint64{4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 20, 21, 22}
-
-	// approximate cost (in group operations)
-	// cost = bits/c * (nbPoints + 2^{c})
-	// this needs to be verified empirically.
-	// for example, on a MBP 2016, for G2 MultiExp > 8M points, hand picking c gives better results
-	min := math.MaxFloat64
-	for _, c := range implementedCs {
-		cc := fr.Limbs * 64 * (nbPoints + (1 << (c)))
-		cost := float64(cc) / float64(c)
-		if cost < min {
-			min = cost
-			C = c
+	// here, we compute the best C for nbPoints
+	// we split recursively until nbChunks(c) >= nbTasks,
+	bestC := func(nbPoints int) uint64 {
+		// implemented msmC methods (the c we use must be in this slice)
+		implementedCs := []uint64{4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 20, 21, 22}
+		var C uint64
+		// approximate cost (in group operations)
+		// cost = bits/c * (nbPoints + 2^{c})
+		// this needs to be verified empirically.
+		// for example, on a MBP 2016, for G2 MultiExp > 8M points, hand picking c gives better results
+		min := math.MaxFloat64
+		for _, c := range implementedCs {
+			cc := fr.Limbs * 64 * (nbPoints + (1 << (c)))
+			cost := float64(cc) / float64(c)
+			if cost < min {
+				min = cost
+				C = c
+			}
 		}
+		// empirical, needs to be tuned.
+		// if C > 16 && nbPoints < 1 << 23 {
+		// 	C = 16
+		// }
+		return C
 	}
 
-	// empirical, needs to be tuned.
-	// if C > 16 && nbPoints < 1 << 23 {
-	// 	C = 16
-	// }
-
-	// take all the cpus to ourselves
-	opt.lock.Lock()
+	var C uint64
+	nbSplits := 1
+	nbChunks := 0
+	for nbChunks < config.NbTasks {
+		C = bestC(nbPoints)
+		nbChunks = int(fr.Limbs * 64 / C) // number of c-bit radixes in a scalar
+		if (fr.Limbs*64)%C != 0 {
+			nbChunks++
+		}
+		nbChunks *= nbSplits
+		if nbChunks < config.NbTasks {
+			nbSplits <<= 1
+			nbPoints >>= 1
+		}
+	}
 
 	// partition the scalars
 	// note: we do that before the actual chunk processing, as for each c-bit window (starting from LSW)
 	// if it's larger than 2^{c-1}, we have a carry we need to propagate up to the higher window
-	scalars = partitionScalars(scalars, C)
+	var smallValues int
+	scalars, smallValues = partitionScalars(scalars, C, config.ScalarsMont, config.NbTasks)
+
+	// if we have more than 10% of small values, we split the processing of the first chunk in 2
+	// we may want to do that in msmInnerG2Jac , but that would incur a cost of looping through all scalars one more time
+	splitFirstChunk := (float64(smallValues) / float64(len(scalars))) >= 0.1
+
+	// we have nbSplits intermediate results that we must sum together.
+	_p := make([]G2Jac, nbSplits-1)
+	chDone := make(chan int, nbSplits-1)
+	for i := 0; i < nbSplits-1; i++ {
+		start := i * nbPoints
+		end := start + nbPoints
+		go func(start, end, i int) {
+			msmInnerG2Jac(&_p[i], int(C), points[start:end], scalars[start:end], splitFirstChunk)
+			chDone <- i
+		}(start, end, i)
+	}
+
+	msmInnerG2Jac(p, int(C), points[(nbSplits-1)*nbPoints:], scalars[(nbSplits-1)*nbPoints:], splitFirstChunk)
+	for i := 0; i < nbSplits-1; i++ {
+		done := <-chDone
+		p.AddAssign(&_p[done])
+	}
+	close(chDone)
+	return p, nil
+}
 
-	switch C {
+func msmInnerG2Jac(p *G2Jac, c int, points []G2Affine, scalars []fr.Element, splitFirstChunk bool) {
+
+	switch c {
 
 	case 4:
-		return p.msmC4(points, scalars, opt)
+		p.msmC4(points, scalars, splitFirstChunk)
 
 	case 5:
-		return p.msmC5(points, scalars, opt)
+		p.msmC5(points, scalars, splitFirstChunk)
 
 	case 6:
-		return p.msmC6(points, scalars, opt)
+		p.msmC6(points, scalars, splitFirstChunk)
 
 	case 7:
-		return p.msmC7(points, scalars, opt)
+		p.msmC7(points, scalars, splitFirstChunk)
 
 	case 8:
-		return p.msmC8(points, scalars, opt)
+		p.msmC8(points, scalars, splitFirstChunk)
 
 	case 9:
-		return p.msmC9(points, scalars, opt)
+		p.msmC9(points, scalars, splitFirstChunk)
 
 	case 10:
-		return p.msmC10(points, scalars, opt)
+		p.msmC10(points, scalars, splitFirstChunk)
 
 	case 11:
-		return p.msmC11(points, scalars, opt)
+		p.msmC11(points, scalars, splitFirstChunk)
 
 	case 12:
-		return p.msmC12(points, scalars, opt)
+		p.msmC12(points, scalars, splitFirstChunk)
 
 	case 13:
-		return p.msmC13(points, scalars, opt)
+		p.msmC13(points, scalars, splitFirstChunk)
 
 	case 14:
-		return p.msmC14(points, scalars, opt)
+		p.msmC14(points, scalars, splitFirstChunk)
 
 	case 15:
-		return p.msmC15(points, scalars, opt)
+		p.msmC15(points, scalars, splitFirstChunk)
 
 	case 16:
-		return p.msmC16(points, scalars, opt)
+		p.msmC16(points, scalars, splitFirstChunk)
 
 	case 20:
-		return p.msmC20(points, scalars, opt)
+		p.msmC20(points, scalars, splitFirstChunk)
 
 	case 21:
-		return p.msmC21(points, scalars, opt)
+		p.msmC21(points, scalars, splitFirstChunk)
 
 	case 22:
-		return p.msmC22(points, scalars, opt)
+		p.msmC22(points, scalars, splitFirstChunk)
 
 	default:
-		panic("unimplemented")
+		panic("not implemented")
 	}
 }
 
@@ -1184,625 +1488,816 @@ func msmProcessChunkG2Affine(chunk uint64,
 	}
 
 	chRes <- total
-	close(chRes)
+
 }
 
-func (p *G2Jac) msmC4(points []G2Affine, scalars []fr.Element, opt *CPUSemaphore) *G2Jac {
-	const c = 4                          // scalars partitioned into c-bit radixes
-	const nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+func (p *G2Jac) msmC4(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 4                   // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
 
-	// for each chunk, spawn a go routine that'll loop through all the scalars
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
 	var chChunks [nbChunks]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
 
-	// wait group to wait for all the go routines to start
-	var wg sync.WaitGroup
-	for chunk := nbChunks - 1; chunk >= 0; chunk-- {
-		chChunks[chunk] = make(chan g2JacExtended, 1)
-		<-opt.chCpus // wait to have a cpu before scheduling
-		wg.Add(1)
-		go func(j uint64, chRes chan g2JacExtended, points []G2Affine, scalars []fr.Element) {
-			wg.Done()
-			var buckets [1 << (c - 1)]g2JacExtended
-			msmProcessChunkG2Affine(j, chRes, buckets[:], c, points, scalars)
-			opt.chCpus <- struct{}{} // release token in the semaphore
-		}(uint64(chunk), chChunks[chunk], points, scalars)
-	}
-
-	// wait for all goRoutines to actually start
-	wg.Wait()
-
-	// all my tasks are scheduled, I can let other func use avaiable tokens in the semaphore
-	opt.lock.Unlock()
 	return msmReduceChunkG2Affine(p, c, chChunks[:])
 }
 
-func (p *G2Jac) msmC5(points []G2Affine, scalars []fr.Element, opt *CPUSemaphore) *G2Jac {
-	const c = 5                              // scalars partitioned into c-bit radixes
-	const nbChunks = (fr.Limbs * 64 / c) + 1 // number of c-bit radixes in a scalar
-
-	// for each chunk, spawn a go routine that'll loop through all the scalars
-	var chChunks [nbChunks]chan g2JacExtended
+func (p *G2Jac) msmC5(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 5                   // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
 
-	// wait group to wait for all the go routines to start
-	var wg sync.WaitGroup
 	// c doesn't divide 256, last window is smaller we can allocate less buckets
 	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
-	chChunks[nbChunks-1] = make(chan g2JacExtended, 1)
-	<-opt.chCpus // wait to have a cpu before scheduling
-	wg.Add(1)
-	go func(j uint64, chRes chan g2JacExtended, points []G2Affine, scalars []fr.Element) {
-		wg.Done()
+	go func(j uint64, points []G2Affine, scalars []fr.Element) {
 		var buckets [1 << (lastC - 1)]g2JacExtended
-		msmProcessChunkG2Affine(j, chRes, buckets[:], c, points, scalars)
-		opt.chCpus <- struct{}{} // release token in the semaphore
-	}(uint64(nbChunks-1), chChunks[nbChunks-1], points, scalars)
-
-	for chunk := nbChunks - 2; chunk >= 0; chunk-- {
-		chChunks[chunk] = make(chan g2JacExtended, 1)
-		<-opt.chCpus // wait to have a cpu before scheduling
-		wg.Add(1)
-		go func(j uint64, chRes chan g2JacExtended, points []G2Affine, scalars []fr.Element) {
-			wg.Done()
-			var buckets [1 << (c - 1)]g2JacExtended
-			msmProcessChunkG2Affine(j, chRes, buckets[:], c, points, scalars)
-			opt.chCpus <- struct{}{} // release token in the semaphore
-		}(uint64(chunk), chChunks[chunk], points, scalars)
-	}
-
-	// wait for all goRoutines to actually start
-	wg.Wait()
-
-	// all my tasks are scheduled, I can let other func use avaiable tokens in the semaphore
-	opt.lock.Unlock()
+		msmProcessChunkG2Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
 	return msmReduceChunkG2Affine(p, c, chChunks[:])
 }
 
-func (p *G2Jac) msmC6(points []G2Affine, scalars []fr.Element, opt *CPUSemaphore) *G2Jac {
-	const c = 6                              // scalars partitioned into c-bit radixes
-	const nbChunks = (fr.Limbs * 64 / c) + 1 // number of c-bit radixes in a scalar
-
-	// for each chunk, spawn a go routine that'll loop through all the scalars
-	var chChunks [nbChunks]chan g2JacExtended
+func (p *G2Jac) msmC6(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 6                   // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
 
-	// wait group to wait for all the go routines to start
-	var wg sync.WaitGroup
 	// c doesn't divide 256, last window is smaller we can allocate less buckets
 	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
-	chChunks[nbChunks-1] = make(chan g2JacExtended, 1)
-	<-opt.chCpus // wait to have a cpu before scheduling
-	wg.Add(1)
-	go func(j uint64, chRes chan g2JacExtended, points []G2Affine, scalars []fr.Element) {
-		wg.Done()
+	go func(j uint64, points []G2Affine, scalars []fr.Element) {
 		var buckets [1 << (lastC - 1)]g2JacExtended
-		msmProcessChunkG2Affine(j, chRes, buckets[:], c, points, scalars)
-		opt.chCpus <- struct{}{} // release token in the semaphore
-	}(uint64(nbChunks-1), chChunks[nbChunks-1], points, scalars)
-
-	for chunk := nbChunks - 2; chunk >= 0; chunk-- {
-		chChunks[chunk] = make(chan g2JacExtended, 1)
-		<-opt.chCpus // wait to have a cpu before scheduling
-		wg.Add(1)
-		go func(j uint64, chRes chan g2JacExtended, points []G2Affine, scalars []fr.Element) {
-			wg.Done()
-			var buckets [1 << (c - 1)]g2JacExtended
-			msmProcessChunkG2Affine(j, chRes, buckets[:], c, points, scalars)
-			opt.chCpus <- struct{}{} // release token in the semaphore
-		}(uint64(chunk), chChunks[chunk], points, scalars)
-	}
-
-	// wait for all goRoutines to actually start
-	wg.Wait()
-
-	// all my tasks are scheduled, I can let other func use avaiable tokens in the semaphore
-	opt.lock.Unlock()
+		msmProcessChunkG2Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
 	return msmReduceChunkG2Affine(p, c, chChunks[:])
 }
 
-func (p *G2Jac) msmC7(points []G2Affine, scalars []fr.Element, opt *CPUSemaphore) *G2Jac {
-	const c = 7                              // scalars partitioned into c-bit radixes
-	const nbChunks = (fr.Limbs * 64 / c) + 1 // number of c-bit radixes in a scalar
-
-	// for each chunk, spawn a go routine that'll loop through all the scalars
-	var chChunks [nbChunks]chan g2JacExtended
+func (p *G2Jac) msmC7(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 7                   // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
 
-	// wait group to wait for all the go routines to start
-	var wg sync.WaitGroup
 	// c doesn't divide 256, last window is smaller we can allocate less buckets
 	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
-	chChunks[nbChunks-1] = make(chan g2JacExtended, 1)
-	<-opt.chCpus // wait to have a cpu before scheduling
-	wg.Add(1)
-	go func(j uint64, chRes chan g2JacExtended, points []G2Affine, scalars []fr.Element) {
-		wg.Done()
+	go func(j uint64, points []G2Affine, scalars []fr.Element) {
 		var buckets [1 << (lastC - 1)]g2JacExtended
-		msmProcessChunkG2Affine(j, chRes, buckets[:], c, points, scalars)
-		opt.chCpus <- struct{}{} // release token in the semaphore
-	}(uint64(nbChunks-1), chChunks[nbChunks-1], points, scalars)
-
-	for chunk := nbChunks - 2; chunk >= 0; chunk-- {
-		chChunks[chunk] = make(chan g2JacExtended, 1)
-		<-opt.chCpus // wait to have a cpu before scheduling
-		wg.Add(1)
-		go func(j uint64, chRes chan g2JacExtended, points []G2Affine, scalars []fr.Element) {
-			wg.Done()
-			var buckets [1 << (c - 1)]g2JacExtended
-			msmProcessChunkG2Affine(j, chRes, buckets[:], c, points, scalars)
-			opt.chCpus <- struct{}{} // release token in the semaphore
-		}(uint64(chunk), chChunks[chunk], points, scalars)
-	}
-
-	// wait for all goRoutines to actually start
-	wg.Wait()
-
-	// all my tasks are scheduled, I can let other func use avaiable tokens in the semaphore
-	opt.lock.Unlock()
+		msmProcessChunkG2Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
 	return msmReduceChunkG2Affine(p, c, chChunks[:])
 }
 
-func (p *G2Jac) msmC8(points []G2Affine, scalars []fr.Element, opt *CPUSemaphore) *G2Jac {
-	const c = 8                          // scalars partitioned into c-bit radixes
-	const nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+func (p *G2Jac) msmC8(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 8                   // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
 
-	// for each chunk, spawn a go routine that'll loop through all the scalars
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
 	var chChunks [nbChunks]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
 
-	// wait group to wait for all the go routines to start
-	var wg sync.WaitGroup
-	for chunk := nbChunks - 1; chunk >= 0; chunk-- {
-		chChunks[chunk] = make(chan g2JacExtended, 1)
-		<-opt.chCpus // wait to have a cpu before scheduling
-		wg.Add(1)
-		go func(j uint64, chRes chan g2JacExtended, points []G2Affine, scalars []fr.Element) {
-			wg.Done()
-			var buckets [1 << (c - 1)]g2JacExtended
-			msmProcessChunkG2Affine(j, chRes, buckets[:], c, points, scalars)
-			opt.chCpus <- struct{}{} // release token in the semaphore
-		}(uint64(chunk), chChunks[chunk], points, scalars)
-	}
-
-	// wait for all goRoutines to actually start
-	wg.Wait()
-
-	// all my tasks are scheduled, I can let other func use avaiable tokens in the semaphore
-	opt.lock.Unlock()
 	return msmReduceChunkG2Affine(p, c, chChunks[:])
 }
 
-func (p *G2Jac) msmC9(points []G2Affine, scalars []fr.Element, opt *CPUSemaphore) *G2Jac {
-	const c = 9                              // scalars partitioned into c-bit radixes
-	const nbChunks = (fr.Limbs * 64 / c) + 1 // number of c-bit radixes in a scalar
-
-	// for each chunk, spawn a go routine that'll loop through all the scalars
-	var chChunks [nbChunks]chan g2JacExtended
+func (p *G2Jac) msmC9(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 9                   // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
 
-	// wait group to wait for all the go routines to start
-	var wg sync.WaitGroup
 	// c doesn't divide 256, last window is smaller we can allocate less buckets
 	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
-	chChunks[nbChunks-1] = make(chan g2JacExtended, 1)
-	<-opt.chCpus // wait to have a cpu before scheduling
-	wg.Add(1)
-	go func(j uint64, chRes chan g2JacExtended, points []G2Affine, scalars []fr.Element) {
-		wg.Done()
+	go func(j uint64, points []G2Affine, scalars []fr.Element) {
 		var buckets [1 << (lastC - 1)]g2JacExtended
-		msmProcessChunkG2Affine(j, chRes, buckets[:], c, points, scalars)
-		opt.chCpus <- struct{}{} // release token in the semaphore
-	}(uint64(nbChunks-1), chChunks[nbChunks-1], points, scalars)
-
-	for chunk := nbChunks - 2; chunk >= 0; chunk-- {
-		chChunks[chunk] = make(chan g2JacExtended, 1)
-		<-opt.chCpus // wait to have a cpu before scheduling
-		wg.Add(1)
-		go func(j uint64, chRes chan g2JacExtended, points []G2Affine, scalars []fr.Element) {
-			wg.Done()
-			var buckets [1 << (c - 1)]g2JacExtended
-			msmProcessChunkG2Affine(j, chRes, buckets[:], c, points, scalars)
-			opt.chCpus <- struct{}{} // release token in the semaphore
-		}(uint64(chunk), chChunks[chunk], points, scalars)
-	}
-
-	// wait for all goRoutines to actually start
-	wg.Wait()
-
-	// all my tasks are scheduled, I can let other func use avaiable tokens in the semaphore
-	opt.lock.Unlock()
+		msmProcessChunkG2Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
 	return msmReduceChunkG2Affine(p, c, chChunks[:])
 }
 
-func (p *G2Jac) msmC10(points []G2Affine, scalars []fr.Element, opt *CPUSemaphore) *G2Jac {
-	const c = 10                             // scalars partitioned into c-bit radixes
-	const nbChunks = (fr.Limbs * 64 / c) + 1 // number of c-bit radixes in a scalar
-
-	// for each chunk, spawn a go routine that'll loop through all the scalars
-	var chChunks [nbChunks]chan g2JacExtended
+func (p *G2Jac) msmC10(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 10                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
 
-	// wait group to wait for all the go routines to start
-	var wg sync.WaitGroup
 	// c doesn't divide 256, last window is smaller we can allocate less buckets
 	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
-	chChunks[nbChunks-1] = make(chan g2JacExtended, 1)
-	<-opt.chCpus // wait to have a cpu before scheduling
-	wg.Add(1)
-	go func(j uint64, chRes chan g2JacExtended, points []G2Affine, scalars []fr.Element) {
-		wg.Done()
+	go func(j uint64, points []G2Affine, scalars []fr.Element) {
 		var buckets [1 << (lastC - 1)]g2JacExtended
-		msmProcessChunkG2Affine(j, chRes, buckets[:], c, points, scalars)
-		opt.chCpus <- struct{}{} // release token in the semaphore
-	}(uint64(nbChunks-1), chChunks[nbChunks-1], points, scalars)
-
-	for chunk := nbChunks - 2; chunk >= 0; chunk-- {
-		chChunks[chunk] = make(chan g2JacExtended, 1)
-		<-opt.chCpus // wait to have a cpu before scheduling
-		wg.Add(1)
-		go func(j uint64, chRes chan g2JacExtended, points []G2Affine, scalars []fr.Element) {
-			wg.Done()
-			var buckets [1 << (c - 1)]g2JacExtended
-			msmProcessChunkG2Affine(j, chRes, buckets[:], c, points, scalars)
-			opt.chCpus <- struct{}{} // release token in the semaphore
-		}(uint64(chunk), chChunks[chunk], points, scalars)
-	}
-
-	// wait for all goRoutines to actually start
-	wg.Wait()
-
-	// all my tasks are scheduled, I can let other func use avaiable tokens in the semaphore
-	opt.lock.Unlock()
+		msmProcessChunkG2Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
 	return msmReduceChunkG2Affine(p, c, chChunks[:])
 }
 
-func (p *G2Jac) msmC11(points []G2Affine, scalars []fr.Element, opt *CPUSemaphore) *G2Jac {
-	const c = 11                             // scalars partitioned into c-bit radixes
-	const nbChunks = (fr.Limbs * 64 / c) + 1 // number of c-bit radixes in a scalar
-
-	// for each chunk, spawn a go routine that'll loop through all the scalars
-	var chChunks [nbChunks]chan g2JacExtended
+func (p *G2Jac) msmC11(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 11                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
 
-	// wait group to wait for all the go routines to start
-	var wg sync.WaitGroup
 	// c doesn't divide 256, last window is smaller we can allocate less buckets
 	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
-	chChunks[nbChunks-1] = make(chan g2JacExtended, 1)
-	<-opt.chCpus // wait to have a cpu before scheduling
-	wg.Add(1)
-	go func(j uint64, chRes chan g2JacExtended, points []G2Affine, scalars []fr.Element) {
-		wg.Done()
+	go func(j uint64, points []G2Affine, scalars []fr.Element) {
 		var buckets [1 << (lastC - 1)]g2JacExtended
-		msmProcessChunkG2Affine(j, chRes, buckets[:], c, points, scalars)
-		opt.chCpus <- struct{}{} // release token in the semaphore
-	}(uint64(nbChunks-1), chChunks[nbChunks-1], points, scalars)
-
-	for chunk := nbChunks - 2; chunk >= 0; chunk-- {
-		chChunks[chunk] = make(chan g2JacExtended, 1)
-		<-opt.chCpus // wait to have a cpu before scheduling
-		wg.Add(1)
-		go func(j uint64, chRes chan g2JacExtended, points []G2Affine, scalars []fr.Element) {
-			wg.Done()
-			var buckets [1 << (c - 1)]g2JacExtended
-			msmProcessChunkG2Affine(j, chRes, buckets[:], c, points, scalars)
-			opt.chCpus <- struct{}{} // release token in the semaphore
-		}(uint64(chunk), chChunks[chunk], points, scalars)
-	}
-
-	// wait for all goRoutines to actually start
-	wg.Wait()
-
-	// all my tasks are scheduled, I can let other func use avaiable tokens in the semaphore
-	opt.lock.Unlock()
+		msmProcessChunkG2Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
 	return msmReduceChunkG2Affine(p, c, chChunks[:])
 }
 
-func (p *G2Jac) msmC12(points []G2Affine, scalars []fr.Element, opt *CPUSemaphore) *G2Jac {
-	const c = 12                             // scalars partitioned into c-bit radixes
-	const nbChunks = (fr.Limbs * 64 / c) + 1 // number of c-bit radixes in a scalar
-
-	// for each chunk, spawn a go routine that'll loop through all the scalars
-	var chChunks [nbChunks]chan g2JacExtended
+func (p *G2Jac) msmC12(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 12                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
 
-	// wait group to wait for all the go routines to start
-	var wg sync.WaitGroup
 	// c doesn't divide 256, last window is smaller we can allocate less buckets
 	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
-	chChunks[nbChunks-1] = make(chan g2JacExtended, 1)
-	<-opt.chCpus // wait to have a cpu before scheduling
-	wg.Add(1)
-	go func(j uint64, chRes chan g2JacExtended, points []G2Affine, scalars []fr.Element) {
-		wg.Done()
+	go func(j uint64, points []G2Affine, scalars []fr.Element) {
 		var buckets [1 << (lastC - 1)]g2JacExtended
-		msmProcessChunkG2Affine(j, chRes, buckets[:], c, points, scalars)
-		opt.chCpus <- struct{}{} // release token in the semaphore
-	}(uint64(nbChunks-1), chChunks[nbChunks-1], points, scalars)
-
-	for chunk := nbChunks - 2; chunk >= 0; chunk-- {
-		chChunks[chunk] = make(chan g2JacExtended, 1)
-		<-opt.chCpus // wait to have a cpu before scheduling
-		wg.Add(1)
-		go func(j uint64, chRes chan g2JacExtended, points []G2Affine, scalars []fr.Element) {
-			wg.Done()
-			var buckets [1 << (c - 1)]g2JacExtended
-			msmProcessChunkG2Affine(j, chRes, buckets[:], c, points, scalars)
-			opt.chCpus <- struct{}{} // release token in the semaphore
-		}(uint64(chunk), chChunks[chunk], points, scalars)
-	}
-
-	// wait for all goRoutines to actually start
-	wg.Wait()
-
-	// all my tasks are scheduled, I can let other func use avaiable tokens in the semaphore
-	opt.lock.Unlock()
+		msmProcessChunkG2Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
 	return msmReduceChunkG2Affine(p, c, chChunks[:])
 }
 
-func (p *G2Jac) msmC13(points []G2Affine, scalars []fr.Element, opt *CPUSemaphore) *G2Jac {
-	const c = 13                             // scalars partitioned into c-bit radixes
-	const nbChunks = (fr.Limbs * 64 / c) + 1 // number of c-bit radixes in a scalar
-
-	// for each chunk, spawn a go routine that'll loop through all the scalars
-	var chChunks [nbChunks]chan g2JacExtended
+func (p *G2Jac) msmC13(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 13                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
 
-	// wait group to wait for all the go routines to start
-	var wg sync.WaitGroup
 	// c doesn't divide 256, last window is smaller we can allocate less buckets
 	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
-	chChunks[nbChunks-1] = make(chan g2JacExtended, 1)
-	<-opt.chCpus // wait to have a cpu before scheduling
-	wg.Add(1)
-	go func(j uint64, chRes chan g2JacExtended, points []G2Affine, scalars []fr.Element) {
-		wg.Done()
+	go func(j uint64, points []G2Affine, scalars []fr.Element) {
 		var buckets [1 << (lastC - 1)]g2JacExtended
-		msmProcessChunkG2Affine(j, chRes, buckets[:], c, points, scalars)
-		opt.chCpus <- struct{}{} // release token in the semaphore
-	}(uint64(nbChunks-1), chChunks[nbChunks-1], points, scalars)
-
-	for chunk := nbChunks - 2; chunk >= 0; chunk-- {
-		chChunks[chunk] = make(chan g2JacExtended, 1)
-		<-opt.chCpus // wait to have a cpu before scheduling
-		wg.Add(1)
-		go func(j uint64, chRes chan g2JacExtended, points []G2Affine, scalars []fr.Element) {
-			wg.Done()
-			var buckets [1 << (c - 1)]g2JacExtended
-			msmProcessChunkG2Affine(j, chRes, buckets[:], c, points, scalars)
-			opt.chCpus <- struct{}{} // release token in the semaphore
-		}(uint64(chunk), chChunks[chunk], points, scalars)
-	}
-
-	// wait for all goRoutines to actually start
-	wg.Wait()
-
-	// all my tasks are scheduled, I can let other func use avaiable tokens in the semaphore
-	opt.lock.Unlock()
+		msmProcessChunkG2Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
 	return msmReduceChunkG2Affine(p, c, chChunks[:])
 }
 
-func (p *G2Jac) msmC14(points []G2Affine, scalars []fr.Element, opt *CPUSemaphore) *G2Jac {
-	const c = 14                             // scalars partitioned into c-bit radixes
-	const nbChunks = (fr.Limbs * 64 / c) + 1 // number of c-bit radixes in a scalar
-
-	// for each chunk, spawn a go routine that'll loop through all the scalars
-	var chChunks [nbChunks]chan g2JacExtended
+func (p *G2Jac) msmC14(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 14                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
 
-	// wait group to wait for all the go routines to start
-	var wg sync.WaitGroup
 	// c doesn't divide 256, last window is smaller we can allocate less buckets
 	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
-	chChunks[nbChunks-1] = make(chan g2JacExtended, 1)
-	<-opt.chCpus // wait to have a cpu before scheduling
-	wg.Add(1)
-	go func(j uint64, chRes chan g2JacExtended, points []G2Affine, scalars []fr.Element) {
-		wg.Done()
+	go func(j uint64, points []G2Affine, scalars []fr.Element) {
 		var buckets [1 << (lastC - 1)]g2JacExtended
-		msmProcessChunkG2Affine(j, chRes, buckets[:], c, points, scalars)
-		opt.chCpus <- struct{}{} // release token in the semaphore
-	}(uint64(nbChunks-1), chChunks[nbChunks-1], points, scalars)
-
-	for chunk := nbChunks - 2; chunk >= 0; chunk-- {
-		chChunks[chunk] = make(chan g2JacExtended, 1)
-		<-opt.chCpus // wait to have a cpu before scheduling
-		wg.Add(1)
-		go func(j uint64, chRes chan g2JacExtended, points []G2Affine, scalars []fr.Element) {
-			wg.Done()
-			var buckets [1 << (c - 1)]g2JacExtended
-			msmProcessChunkG2Affine(j, chRes, buckets[:], c, points, scalars)
-			opt.chCpus <- struct{}{} // release token in the semaphore
-		}(uint64(chunk), chChunks[chunk], points, scalars)
-	}
-
-	// wait for all goRoutines to actually start
-	wg.Wait()
-
-	// all my tasks are scheduled, I can let other func use avaiable tokens in the semaphore
-	opt.lock.Unlock()
+		msmProcessChunkG2Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
 	return msmReduceChunkG2Affine(p, c, chChunks[:])
 }
 
-func (p *G2Jac) msmC15(points []G2Affine, scalars []fr.Element, opt *CPUSemaphore) *G2Jac {
-	const c = 15                             // scalars partitioned into c-bit radixes
-	const nbChunks = (fr.Limbs * 64 / c) + 1 // number of c-bit radixes in a scalar
-
-	// for each chunk, spawn a go routine that'll loop through all the scalars
-	var chChunks [nbChunks]chan g2JacExtended
+func (p *G2Jac) msmC15(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 15                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
 
-	// wait group to wait for all the go routines to start
-	var wg sync.WaitGroup
 	// c doesn't divide 256, last window is smaller we can allocate less buckets
 	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
-	chChunks[nbChunks-1] = make(chan g2JacExtended, 1)
-	<-opt.chCpus // wait to have a cpu before scheduling
-	wg.Add(1)
-	go func(j uint64, chRes chan g2JacExtended, points []G2Affine, scalars []fr.Element) {
-		wg.Done()
+	go func(j uint64, points []G2Affine, scalars []fr.Element) {
 		var buckets [1 << (lastC - 1)]g2JacExtended
-		msmProcessChunkG2Affine(j, chRes, buckets[:], c, points, scalars)
-		opt.chCpus <- struct{}{} // release token in the semaphore
-	}(uint64(nbChunks-1), chChunks[nbChunks-1], points, scalars)
-
-	for chunk := nbChunks - 2; chunk >= 0; chunk-- {
-		chChunks[chunk] = make(chan g2JacExtended, 1)
-		<-opt.chCpus // wait to have a cpu before scheduling
-		wg.Add(1)
-		go func(j uint64, chRes chan g2JacExtended, points []G2Affine, scalars []fr.Element) {
-			wg.Done()
-			var buckets [1 << (c - 1)]g2JacExtended
-			msmProcessChunkG2Affine(j, chRes, buckets[:], c, points, scalars)
-			opt.chCpus <- struct{}{} // release token in the semaphore
-		}(uint64(chunk), chChunks[chunk], points, scalars)
-	}
-
-	// wait for all goRoutines to actually start
-	wg.Wait()
-
-	// all my tasks are scheduled, I can let other func use avaiable tokens in the semaphore
-	opt.lock.Unlock()
+		msmProcessChunkG2Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
 	return msmReduceChunkG2Affine(p, c, chChunks[:])
 }
 
-func (p *G2Jac) msmC16(points []G2Affine, scalars []fr.Element, opt *CPUSemaphore) *G2Jac {
-	const c = 16                         // scalars partitioned into c-bit radixes
-	const nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+func (p *G2Jac) msmC16(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 16                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
 
-	// for each chunk, spawn a go routine that'll loop through all the scalars
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
 	var chChunks [nbChunks]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
 
-	// wait group to wait for all the go routines to start
-	var wg sync.WaitGroup
-	for chunk := nbChunks - 1; chunk >= 0; chunk-- {
-		chChunks[chunk] = make(chan g2JacExtended, 1)
-		<-opt.chCpus // wait to have a cpu before scheduling
-		wg.Add(1)
-		go func(j uint64, chRes chan g2JacExtended, points []G2Affine, scalars []fr.Element) {
-			wg.Done()
-			var buckets [1 << (c - 1)]g2JacExtended
-			msmProcessChunkG2Affine(j, chRes, buckets[:], c, points, scalars)
-			opt.chCpus <- struct{}{} // release token in the semaphore
-		}(uint64(chunk), chChunks[chunk], points, scalars)
-	}
-
-	// wait for all goRoutines to actually start
-	wg.Wait()
-
-	// all my tasks are scheduled, I can let other func use avaiable tokens in the semaphore
-	opt.lock.Unlock()
 	return msmReduceChunkG2Affine(p, c, chChunks[:])
 }
 
-func (p *G2Jac) msmC20(points []G2Affine, scalars []fr.Element, opt *CPUSemaphore) *G2Jac {
-	const c = 20                             // scalars partitioned into c-bit radixes
-	const nbChunks = (fr.Limbs * 64 / c) + 1 // number of c-bit radixes in a scalar
-
-	// for each chunk, spawn a go routine that'll loop through all the scalars
-	var chChunks [nbChunks]chan g2JacExtended
+func (p *G2Jac) msmC20(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 20                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
 
-	// wait group to wait for all the go routines to start
-	var wg sync.WaitGroup
 	// c doesn't divide 256, last window is smaller we can allocate less buckets
 	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
-	chChunks[nbChunks-1] = make(chan g2JacExtended, 1)
-	<-opt.chCpus // wait to have a cpu before scheduling
-	wg.Add(1)
-	go func(j uint64, chRes chan g2JacExtended, points []G2Affine, scalars []fr.Element) {
-		wg.Done()
+	go func(j uint64, points []G2Affine, scalars []fr.Element) {
 		var buckets [1 << (lastC - 1)]g2JacExtended
-		msmProcessChunkG2Affine(j, chRes, buckets[:], c, points, scalars)
-		opt.chCpus <- struct{}{} // release token in the semaphore
-	}(uint64(nbChunks-1), chChunks[nbChunks-1], points, scalars)
-
-	for chunk := nbChunks - 2; chunk >= 0; chunk-- {
-		chChunks[chunk] = make(chan g2JacExtended, 1)
-		<-opt.chCpus // wait to have a cpu before scheduling
-		wg.Add(1)
-		go func(j uint64, chRes chan g2JacExtended, points []G2Affine, scalars []fr.Element) {
-			wg.Done()
-			var buckets [1 << (c - 1)]g2JacExtended
-			msmProcessChunkG2Affine(j, chRes, buckets[:], c, points, scalars)
-			opt.chCpus <- struct{}{} // release token in the semaphore
-		}(uint64(chunk), chChunks[chunk], points, scalars)
-	}
-
-	// wait for all goRoutines to actually start
-	wg.Wait()
-
-	// all my tasks are scheduled, I can let other func use avaiable tokens in the semaphore
-	opt.lock.Unlock()
+		msmProcessChunkG2Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
 	return msmReduceChunkG2Affine(p, c, chChunks[:])
 }
 
-func (p *G2Jac) msmC21(points []G2Affine, scalars []fr.Element, opt *CPUSemaphore) *G2Jac {
-	const c = 21                             // scalars partitioned into c-bit radixes
-	const nbChunks = (fr.Limbs * 64 / c) + 1 // number of c-bit radixes in a scalar
-
-	// for each chunk, spawn a go routine that'll loop through all the scalars
-	var chChunks [nbChunks]chan g2JacExtended
+func (p *G2Jac) msmC21(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 21                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
 
-	// wait group to wait for all the go routines to start
-	var wg sync.WaitGroup
 	// c doesn't divide 256, last window is smaller we can allocate less buckets
 	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
-	chChunks[nbChunks-1] = make(chan g2JacExtended, 1)
-	<-opt.chCpus // wait to have a cpu before scheduling
-	wg.Add(1)
-	go func(j uint64, chRes chan g2JacExtended, points []G2Affine, scalars []fr.Element) {
-		wg.Done()
+	go func(j uint64, points []G2Affine, scalars []fr.Element) {
 		var buckets [1 << (lastC - 1)]g2JacExtended
-		msmProcessChunkG2Affine(j, chRes, buckets[:], c, points, scalars)
-		opt.chCpus <- struct{}{} // release token in the semaphore
-	}(uint64(nbChunks-1), chChunks[nbChunks-1], points, scalars)
-
-	for chunk := nbChunks - 2; chunk >= 0; chunk-- {
-		chChunks[chunk] = make(chan g2JacExtended, 1)
-		<-opt.chCpus // wait to have a cpu before scheduling
-		wg.Add(1)
-		go func(j uint64, chRes chan g2JacExtended, points []G2Affine, scalars []fr.Element) {
-			wg.Done()
-			var buckets [1 << (c - 1)]g2JacExtended
-			msmProcessChunkG2Affine(j, chRes, buckets[:], c, points, scalars)
-			opt.chCpus <- struct{}{} // release token in the semaphore
-		}(uint64(chunk), chChunks[chunk], points, scalars)
-	}
-
-	// wait for all goRoutines to actually start
-	wg.Wait()
-
-	// all my tasks are scheduled, I can let other func use avaiable tokens in the semaphore
-	opt.lock.Unlock()
+		msmProcessChunkG2Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
 	return msmReduceChunkG2Affine(p, c, chChunks[:])
 }
 
-func (p *G2Jac) msmC22(points []G2Affine, scalars []fr.Element, opt *CPUSemaphore) *G2Jac {
-	const c = 22                             // scalars partitioned into c-bit radixes
-	const nbChunks = (fr.Limbs * 64 / c) + 1 // number of c-bit radixes in a scalar
-
-	// for each chunk, spawn a go routine that'll loop through all the scalars
-	var chChunks [nbChunks]chan g2JacExtended
+func (p *G2Jac) msmC22(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 22                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
 
-	// wait group to wait for all the go routines to start
-	var wg sync.WaitGroup
 	// c doesn't divide 256, last window is smaller we can allocate less buckets
 	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
-	chChunks[nbChunks-1] = make(chan g2JacExtended, 1)
-	<-opt.chCpus // wait to have a cpu before scheduling
-	wg.Add(1)
-	go func(j uint64, chRes chan g2JacExtended, points []G2Affine, scalars []fr.Element) {
-		wg.Done()
+	go func(j uint64, points []G2Affine, scalars []fr.Element) {
 		var buckets [1 << (lastC - 1)]g2JacExtended
-		msmProcessChunkG2Affine(j, chRes, buckets[:], c, points, scalars)
-		opt.chCpus <- struct{}{} // release token in the semaphore
-	}(uint64(nbChunks-1), chChunks[nbChunks-1], points, scalars)
-
-	for chunk := nbChunks - 2; chunk >= 0; chunk-- {
-		chChunks[chunk] = make(chan g2JacExtended, 1)
-		<-opt.chCpus // wait to have a cpu before scheduling
-		wg.Add(1)
-		go func(j uint64, chRes chan g2JacExtended, points []G2Affine, scalars []fr.Element) {
-			wg.Done()
-			var buckets [1 << (c - 1)]g2JacExtended
-			msmProcessChunkG2Affine(j, chRes, buckets[:], c, points, scalars)
-			opt.chCpus <- struct{}{} // release token in the semaphore
-		}(uint64(chunk), chChunks[chunk], points, scalars)
-	}
-
-	// wait for all goRoutines to actually start
-	wg.Wait()
-
-	// all my tasks are scheduled, I can let other func use avaiable tokens in the semaphore
-	opt.lock.Unlock()
+		msmProcessChunkG2Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
 	return msmReduceChunkG2Affine(p, c, chChunks[:])
 }
diff --git a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/pairing.go b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/pairing.go
index c73918f7196..5d81cfd9444 100644
--- a/vendor/github.com/consensys/gnark-crypto/ecc/bn254/pairing.go
+++ b/vendor/github.com/consensys/gnark-crypto/ecc/bn254/pairing.go
@@ -182,6 +182,8 @@ func MillerLoop(P []G1Affine, Q []G2Affine) (GT, error) {
 	}
 
 	var Q1, Q2 G2Affine
+	var l0 lineEvaluation
+	var tmp GT
 	// cf https://eprint.iacr.org/2010/354.pdf for instance for optimal Ate Pairing
 	for k := 0; k < n; k++ {
 		//Q1 = Frob(Q)
@@ -192,17 +194,15 @@ func MillerLoop(P []G1Affine, Q []G2Affine) (GT, error) {
 		Q2.X.MulByNonResidue2Power2(&q[k].X)
 		Q2.Y.MulByNonResidue2Power3(&q[k].Y).Neg(&Q2.Y)
 
-		qProj[k].AddMixedStep(&l, &Q1)
-		// line evaluation
-		l.r0.MulByElement(&l.r0, &p[k].Y)
-		l.r1.MulByElement(&l.r1, &p[k].X)
-		result.MulBy034(&l.r0, &l.r1, &l.r2)
+		qProj[k].AddMixedStep(&l0, &Q1)
+		l0.r0.MulByElement(&l0.r0, &p[k].Y)
+		l0.r1.MulByElement(&l0.r1, &p[k].X)
 
 		qProj[k].AddMixedStep(&l, &Q2)
-		// line evaluation
 		l.r0.MulByElement(&l.r0, &p[k].Y)
 		l.r1.MulByElement(&l.r1, &p[k].X)
-		result.MulBy034(&l.r0, &l.r1, &l.r2)
+		tmp.Mul034by034(&l.r0, &l.r1, &l.r2, &l0.r0, &l0.r1, &l0.r2)
+		result.Mul(&result, &tmp)
 	}
 
 	return result, nil
@@ -213,9 +213,9 @@ func MillerLoop(P []G1Affine, Q []G2Affine) (GT, error) {
 func (p *g2Proj) DoubleStep(evaluations *lineEvaluation) {
 
 	// get some Element from our pool
-	var t0, t1, A, B, C, D, E, EE, F, G, H, I, J, K fptower.E2
-	t0.Mul(&p.x, &p.y)
-	A.MulByElement(&t0, &twoInv)
+	var t1, A, B, C, D, E, EE, F, G, H, I, J, K fptower.E2
+	A.Mul(&p.x, &p.y)
+	A.Halve()
 	B.Square(&p.y)
 	C.Square(&p.z)
 	D.Double(&C).
@@ -224,7 +224,7 @@ func (p *g2Proj) DoubleStep(evaluations *lineEvaluation) {
 	F.Double(&E).
 		Add(&F, &E)
 	G.Add(&B, &F)
-	G.MulByElement(&G, &twoInv)
+	G.Halve()
 	H.Add(&p.y, &p.z).
 		Square(&H)
 	t1.Add(&B, &C)
diff --git a/vendor/github.com/consensys/gnark-crypto/ecc/ecc.go b/vendor/github.com/consensys/gnark-crypto/ecc/ecc.go
index f6afa3159c6..7f5e531c030 100644
--- a/vendor/github.com/consensys/gnark-crypto/ecc/ecc.go
+++ b/vendor/github.com/consensys/gnark-crypto/ecc/ecc.go
@@ -14,23 +14,45 @@ See the License for the specific language governing permissions and
 limitations under the License.
 */
 
-// Package ecc is an elliptic curve (+pairing) library.
-// Provides implementation for bls12-381, bls12-377, bn254, bw6-761 and their twisted edwards "companion curves"
+// Package ecc provides bls12-381, bls12-377, bn254, bw6-761, bls24-315 and bw6-633 elliptic curves implementation (+pairing).
+//
+// Also
+//
+//	* Multi exponentiation
+//	* FFT
+//	* Polynomial commitment schemes
+//	* MiMC
+//	* twisted edwards "companion curves"
+//	* EdDSA (on the "companion" twisted edwards curves)
 package ecc
 
+import (
+	"math/big"
+
+	"github.com/consensys/gnark-crypto/internal/generator/config"
+)
+
+// ID represent a unique ID for a curve
+type ID uint16
+
 // do not modify the order of this enum
 const (
 	UNKNOWN ID = iota
 	BN254
 	BLS12_377
 	BLS12_381
+	BLS24_315
 	BW6_761
+	BW6_633
 )
 
-// ID represent a unique ID for a curve
-type ID uint16
+// Implemented return the list of curves fully implemented in gnark-crypto
+func Implemented() []ID {
+	return []ID{BN254, BLS12_377, BLS12_381, BW6_761, BLS24_315}
+}
 
 func (id ID) String() string {
+	// TODO link with config.XXX.Name ?
 	switch id {
 	case BLS12_377:
 		return "bls12_377"
@@ -40,7 +62,59 @@ func (id ID) String() string {
 		return "bn254"
 	case BW6_761:
 		return "bw6_761"
+	case BW6_633:
+		return "bw6_633"
+	case BLS24_315:
+		return "bls24_315"
+	default:
+		panic("unimplemented ecc ID")
+	}
+}
+
+// Info returns constants related to a curve
+func (id ID) Info() Info {
+	// note to avoid circular dependency these are hard coded
+	// values are checked for non regression in code generation
+	switch id {
+	case BLS12_377:
+		return newInfo(&config.BLS12_377)
+	case BLS12_381:
+		return newInfo(&config.BLS12_381)
+	case BN254:
+		return newInfo(&config.BN254)
+	case BW6_761:
+		return newInfo(&config.BW6_761)
+	case BW6_633:
+		return newInfo(&config.BW6_633)
+	case BLS24_315:
+		return newInfo(&config.BLS24_315)
 	default:
 		panic("unimplemented ecc ID")
 	}
 }
+
+func newInfo(c *config.Curve) Info {
+	return Info{
+		Fp: config.Field{
+			Bits:    c.FpInfo.Bits,
+			Bytes:   c.FpInfo.Bytes,
+			Modulus: func() *big.Int { return new(big.Int).Set(c.FpInfo.Modulus()) },
+		},
+		Fr: config.Field{
+			Bits:    c.FrInfo.Bits,
+			Bytes:   c.FrInfo.Bytes,
+			Modulus: func() *big.Int { return new(big.Int).Set(c.FrInfo.Modulus()) },
+		},
+	}
+}
+
+// Info contains constants related to a curve
+type Info struct {
+	Fp, Fr config.Field
+}
+
+// MultiExpConfig enables to set optional configuration attribute to a call to MultiExp
+type MultiExpConfig struct {
+	NbTasks     int  // go routines to be used in the multiexp. can be larger than num cpus.
+	ScalarsMont bool // indicates if the scalars are in montgommery form. Default to false.
+}
diff --git a/vendor/github.com/consensys/gnark-crypto/ecc/ecc.md b/vendor/github.com/consensys/gnark-crypto/ecc/ecc.md
index 11aec026988..b394e4d728a 100644
--- a/vendor/github.com/consensys/gnark-crypto/ecc/ecc.md
+++ b/vendor/github.com/consensys/gnark-crypto/ecc/ecc.md
@@ -4,10 +4,11 @@
 * BN254 (Ethereum)
 * BLS12-377 (ZEXE)
 * BW6-761 (EC supporting pairing on BLS12-377 field of definition)
+* BLS24-315
+* BW6-633 (EC supporting pairing on BLS24-315 field of definition)
 
 ### Twisted edwards curves
 
-Each of these curve has a `twistededwards` sub-package with its companion curve. Also known as [Jubjub](https://z.cash/technology/jubjub/) (BLS12-381) or [Baby-Jubjub](https://iden3-docs.readthedocs.io/en/latest/_downloads/33717d75ab84e11313cc0d8a090b636f/Baby-Jubjub.pdf) (BN254). 
+Each of these curve has a `twistededwards` sub-package with its companion curve. In particular, BLS12-381 comapnion curve is known as [Jubjub](https://z.cash/technology/jubjub/) and BN254's [Baby-Jubjub](https://iden3-docs.readthedocs.io/en/latest/_downloads/33717d75ab84e11313cc0d8a090b636f/Baby-Jubjub.pdf).
 
 They are of particular interest as they allow efficient elliptic curve cryptography inside zkSNARK circuits.
-
diff --git a/vendor/github.com/consensys/gnark-crypto/ecc/utils.go b/vendor/github.com/consensys/gnark-crypto/ecc/utils.go
index 44ecf200c38..aca2d8e3fa9 100644
--- a/vendor/github.com/consensys/gnark-crypto/ecc/utils.go
+++ b/vendor/github.com/consensys/gnark-crypto/ecc/utils.go
@@ -4,6 +4,7 @@ import (
 	"crypto/sha256"
 	"errors"
 	"math/big"
+	"math/bits"
 )
 
 //-------------------------------------------------------
@@ -249,3 +250,19 @@ func ExpandMsgXmd(msg, dst []byte, lenInBytes int) ([]byte, error) {
 	}
 	return res, nil
 }
+
+// NextPowerOfTwo returns the next power of 2 of n
+func NextPowerOfTwo(n uint64) uint64 {
+	c := bits.OnesCount64(n)
+	if c == 0 {
+		return 1
+	}
+	if c == 1 {
+		return n
+	}
+	t := bits.LeadingZeros64(n)
+	if t == 0 {
+		panic("next power of 2 overflows uint64")
+	}
+	return uint64(1) << (64 - t)
+}
diff --git a/vendor/github.com/consensys/gnark-crypto/field/field.go b/vendor/github.com/consensys/gnark-crypto/field/field.go
new file mode 100644
index 00000000000..5d521f9415a
--- /dev/null
+++ b/vendor/github.com/consensys/gnark-crypto/field/field.go
@@ -0,0 +1,305 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package field provides Golang code generation for efficient field arithmetic operations.
+package field
+
+import (
+	"errors"
+	"math/big"
+
+	"github.com/consensys/gnark-crypto/field/internal/addchain"
+)
+
+var (
+	errUnsupportedModulus = errors.New("unsupported modulus. goff only works for prime modulus w/ size > 64bits")
+	errParseModulus       = errors.New("can't parse modulus")
+)
+
+// Field precomputed values used in template for code generation of field element APIs
+type Field struct {
+	PackageName                string
+	ElementName                string
+	ModulusBig                 *big.Int
+	Modulus                    string
+	ModulusHex                 string
+	NbWords                    int
+	NbBits                     int
+	NbWordsLastIndex           int
+	NbWordsIndexesNoZero       []int
+	NbWordsIndexesFull         []int
+	NbWordsIndexesNoLast       []int
+	NbWordsIndexesNoZeroNoLast []int
+	P20InversionCorrectiveFac  []uint64
+	P20InversionNbIterations   int
+	Q                          []uint64
+	QInverse                   []uint64
+	QMinusOneHalvedP           []uint64 // ((q-1) / 2 ) + 1
+	ASM                        bool
+	RSquare                    []uint64
+	One                        []uint64
+	LegendreExponent           string // big.Int to base16 string
+	NoCarry                    bool
+	NoCarrySquare              bool // used if NoCarry is set, but some op may overflow in square optimization
+	SqrtQ3Mod4                 bool
+	SqrtAtkin                  bool
+	SqrtTonelliShanks          bool
+	SqrtE                      uint64
+	SqrtS                      []uint64
+	SqrtAtkinExponent          string   // big.Int to base16 string
+	SqrtSMinusOneOver2         string   // big.Int to base16 string
+	SqrtQ3Mod4Exponent         string   // big.Int to base16 string
+	SqrtG                      []uint64 // NonResidue ^  SqrtR (montgomery form)
+	NonResidue                 []uint64 // (montgomery form)
+	LegendreExponentData       *addchain.AddChainData
+	SqrtAtkinExponentData      *addchain.AddChainData
+	SqrtSMinusOneOver2Data     *addchain.AddChainData
+	SqrtQ3Mod4ExponentData     *addchain.AddChainData
+	UseAddChain                bool
+}
+
+// NewField returns a data structure with needed information to generate apis for field element
+//
+// See field/generator package
+func NewField(packageName, elementName, modulus string, useAddChain bool) (*Field, error) {
+	// parse modulus
+	var bModulus big.Int
+	if _, ok := bModulus.SetString(modulus, 10); !ok {
+		return nil, errParseModulus
+	}
+
+	// field info
+	F := &Field{
+		PackageName: packageName,
+		ElementName: elementName,
+		Modulus:     modulus,
+		ModulusHex:  bModulus.Text(16),
+		ModulusBig:  new(big.Int).Set(&bModulus),
+		UseAddChain: useAddChain,
+	}
+	// pre compute field constants
+	F.NbBits = bModulus.BitLen()
+	F.NbWords = len(bModulus.Bits())
+	if F.NbWords < 2 {
+		return nil, errUnsupportedModulus
+	}
+
+	F.NbWordsLastIndex = F.NbWords - 1
+
+	// set q from big int repr
+	F.Q = toUint64Slice(&bModulus)
+	_qHalved := big.NewInt(0)
+	bOne := new(big.Int).SetUint64(1)
+	_qHalved.Sub(&bModulus, bOne).Rsh(_qHalved, 1).Add(_qHalved, bOne)
+	F.QMinusOneHalvedP = toUint64Slice(_qHalved, F.NbWords)
+
+	//  setting qInverse
+	_r := big.NewInt(1)
+	_r.Lsh(_r, uint(F.NbWords)*64)
+	_rInv := big.NewInt(1)
+	_qInv := big.NewInt(0)
+	extendedEuclideanAlgo(_r, &bModulus, _rInv, _qInv)
+	_qInv.Mod(_qInv, _r)
+	F.QInverse = toUint64Slice(_qInv, F.NbWords)
+
+	// Pornin20 inversion correction factors
+	k := 32 // Optimized for 64 bit machines, still works for 32
+
+	p20InvInnerLoopNbIterations := 2*F.NbBits - 1
+	// if constant time inversion then p20InvInnerLoopNbIterations-- (among other changes)
+	F.P20InversionNbIterations = (p20InvInnerLoopNbIterations-1)/(k-1) + 1 // ⌈ (2 * field size - 1) / (k-1) ⌉
+	F.P20InversionNbIterations += F.P20InversionNbIterations % 2           // "round up" to a multiple of 2
+
+	kLimbs := k * F.NbWords
+	p20InversionCorrectiveFacPower := kLimbs*6 + F.P20InversionNbIterations*(kLimbs-k+1)
+	p20InversionCorrectiveFac := big.NewInt(1)
+	p20InversionCorrectiveFac.Lsh(p20InversionCorrectiveFac, uint(p20InversionCorrectiveFacPower))
+	p20InversionCorrectiveFac.Mod(p20InversionCorrectiveFac, &bModulus)
+	F.P20InversionCorrectiveFac = toUint64Slice(p20InversionCorrectiveFac, F.NbWords)
+
+	// rsquare
+	_rSquare := big.NewInt(2)
+	exponent := big.NewInt(int64(F.NbWords) * 64 * 2)
+	_rSquare.Exp(_rSquare, exponent, &bModulus)
+	F.RSquare = toUint64Slice(_rSquare, F.NbWords)
+
+	var one big.Int
+	one.SetUint64(1)
+	one.Lsh(&one, uint(F.NbWords)*64).Mod(&one, &bModulus)
+	F.One = toUint64Slice(&one, F.NbWords)
+
+	// indexes (template helpers)
+	F.NbWordsIndexesFull = make([]int, F.NbWords)
+	F.NbWordsIndexesNoZero = make([]int, F.NbWords-1)
+	F.NbWordsIndexesNoLast = make([]int, F.NbWords-1)
+	F.NbWordsIndexesNoZeroNoLast = make([]int, F.NbWords-2)
+	for i := 0; i < F.NbWords; i++ {
+		F.NbWordsIndexesFull[i] = i
+		if i > 0 {
+			F.NbWordsIndexesNoZero[i-1] = i
+		}
+		if i != F.NbWords-1 {
+			F.NbWordsIndexesNoLast[i] = i
+			if i > 0 {
+				F.NbWordsIndexesNoZeroNoLast[i-1] = i
+			}
+		}
+	}
+
+	// See https://hackmd.io/@gnark/modular_multiplication
+	// if the last word of the modulus is smaller or equal to B,
+	// we can simplify the montgomery multiplication
+	const B = (^uint64(0) >> 1) - 1
+	F.NoCarry = (F.Q[len(F.Q)-1] <= B) && F.NbWords <= 12
+	const BSquare = ^uint64(0) >> 2
+	F.NoCarrySquare = F.Q[len(F.Q)-1] <= BSquare
+
+	// Legendre exponent (p-1)/2
+	var legendreExponent big.Int
+	legendreExponent.SetUint64(1)
+	legendreExponent.Sub(&bModulus, &legendreExponent)
+	legendreExponent.Rsh(&legendreExponent, 1)
+	F.LegendreExponent = legendreExponent.Text(16)
+	if F.UseAddChain {
+		F.LegendreExponentData = addchain.GetAddChain(&legendreExponent)
+	}
+
+	// Sqrt pre computes
+	var qMod big.Int
+	qMod.SetUint64(4)
+	if qMod.Mod(&bModulus, &qMod).Cmp(new(big.Int).SetUint64(3)) == 0 {
+		// q ≡ 3 (mod 4)
+		// using  z ≡ ± x^((p+1)/4) (mod q)
+		F.SqrtQ3Mod4 = true
+		var sqrtExponent big.Int
+		sqrtExponent.SetUint64(1)
+		sqrtExponent.Add(&bModulus, &sqrtExponent)
+		sqrtExponent.Rsh(&sqrtExponent, 2)
+		F.SqrtQ3Mod4Exponent = sqrtExponent.Text(16)
+
+		// add chain stuff
+		if F.UseAddChain {
+			F.SqrtQ3Mod4ExponentData = addchain.GetAddChain(&sqrtExponent)
+		}
+
+	} else {
+		// q ≡ 1 (mod 4)
+		qMod.SetUint64(8)
+		if qMod.Mod(&bModulus, &qMod).Cmp(new(big.Int).SetUint64(5)) == 0 {
+			// q ≡ 5 (mod 8)
+			// use Atkin's algorithm
+			// see modSqrt5Mod8Prime in math/big/int.go
+			F.SqrtAtkin = true
+			e := new(big.Int).Rsh(&bModulus, 3) // e = (q - 5) / 8
+			F.SqrtAtkinExponent = e.Text(16)
+			if F.UseAddChain {
+				F.SqrtAtkinExponentData = addchain.GetAddChain(e)
+			}
+		} else {
+			// use Tonelli-Shanks
+			F.SqrtTonelliShanks = true
+
+			// Write q-1 =2ᵉ * s , s odd
+			var s big.Int
+			one.SetUint64(1)
+			s.Sub(&bModulus, &one)
+
+			e := s.TrailingZeroBits()
+			s.Rsh(&s, e)
+			F.SqrtE = uint64(e)
+			F.SqrtS = toUint64Slice(&s)
+
+			// find non residue
+			var nonResidue big.Int
+			nonResidue.SetInt64(2)
+			one.SetUint64(1)
+			for big.Jacobi(&nonResidue, &bModulus) != -1 {
+				nonResidue.Add(&nonResidue, &one)
+			}
+
+			// g = nonresidue ^ s
+			var g big.Int
+			g.Exp(&nonResidue, &s, &bModulus)
+			// store g in montgomery form
+			g.Lsh(&g, uint(F.NbWords)*64).Mod(&g, &bModulus)
+			F.SqrtG = toUint64Slice(&g, F.NbWords)
+
+			// store non residue in montgomery form
+			nonResidue.Lsh(&nonResidue, uint(F.NbWords)*64).Mod(&nonResidue, &bModulus)
+			F.NonResidue = toUint64Slice(&nonResidue)
+
+			// (s+1) /2
+			s.Sub(&s, &one).Rsh(&s, 1)
+			F.SqrtSMinusOneOver2 = s.Text(16)
+
+			if F.UseAddChain {
+				F.SqrtSMinusOneOver2Data = addchain.GetAddChain(&s)
+			}
+		}
+	}
+
+	// note: to simplify output files generated, we generated ASM code only for
+	// moduli that meet the condition F.NoCarry
+	// asm code generation for moduli with more than 6 words can be optimized further
+	F.ASM = F.NoCarry && F.NbWords <= 12
+
+	return F, nil
+}
+
+func toUint64Slice(b *big.Int, nbWords ...int) (s []uint64) {
+	if len(nbWords) > 0 && nbWords[0] > len(b.Bits()) {
+		s = make([]uint64, nbWords[0])
+	} else {
+		s = make([]uint64, len(b.Bits()))
+	}
+
+	for i, v := range b.Bits() {
+		s[i] = (uint64)(v)
+	}
+	return
+}
+
+// https://en.wikipedia.org/wiki/Extended_Euclidean_algorithm
+// r > q, modifies rinv and qinv such that rinv.r - qinv.q = 1
+func extendedEuclideanAlgo(r, q, rInv, qInv *big.Int) {
+	var s1, s2, t1, t2, qi, tmpMuls, riPlusOne, tmpMult, a, b big.Int
+	t1.SetUint64(1)
+	rInv.Set(big.NewInt(1))
+	qInv.Set(big.NewInt(0))
+	a.Set(r)
+	b.Set(q)
+
+	// r_i+1 = r_i-1 - q_i.r_i
+	// s_i+1 = s_i-1 - q_i.s_i
+	// t_i+1 = t_i-1 - q_i.s_i
+	for b.Sign() > 0 {
+		qi.Div(&a, &b)
+		riPlusOne.Mod(&a, &b)
+
+		tmpMuls.Mul(&s1, &qi)
+		tmpMult.Mul(&t1, &qi)
+
+		s2.Set(&s1)
+		t2.Set(&t1)
+
+		s1.Sub(rInv, &tmpMuls)
+		t1.Sub(qInv, &tmpMult)
+		rInv.Set(&s2)
+		qInv.Set(&t2)
+
+		a.Set(&b)
+		b.Set(&riPlusOne)
+	}
+	qInv.Neg(qInv)
+}
diff --git a/vendor/github.com/consensys/gnark-crypto/field/field.md b/vendor/github.com/consensys/gnark-crypto/field/field.md
new file mode 100644
index 00000000000..4e9648ceb83
--- /dev/null
+++ b/vendor/github.com/consensys/gnark-crypto/field/field.md
@@ -0,0 +1,48 @@
+
+# Usage
+
+At the root of your repo:
+```bash
+go get github.com/consensys/gnark-crypto/field
+``` 
+
+then in a `main.go`  (that can be called using a `go:generate` workflow):
+
+```
+generator.GenerateFF(packageName, structName, modulus, destinationPath, false)
+```
+
+The generated type has an API that's similar with `big.Int`
+
+Example API signature
+```go 
+// Mul z = x * y mod q
+func (z *Element) Mul(x, y *Element) *Element 
+```
+
+and can be used like so:
+
+```go 
+var a, b Element
+a.SetUint64(2)
+b.SetString("984896738")
+
+a.Mul(a, b)
+
+a.Sub(a, a)
+ .Add(a, b)
+ .Inv(a)
+ 
+b.Exp(b, 42)
+b.Neg(b)
+```
+
+### Build tags
+
+Generates optimized assembly for `amd64` target. 
+
+For the `Mul` operation, using `ADX` instructions and `ADOX/ADCX` result in a significant performance gain. 
+
+The "default" target `amd64` checks if the running architecture supports these instruction, and reverts to generic path if not. This check adds a branch and forces the function to reserve some bytes on the frame to store the argument to call `_mulGeneric` .
+
+This package outputs code that can be compiled with `amd64_adx` flag which omits this check. Will crash if the platform running the binary doesn't support the `ADX` instructions (roughly, before 2016). 
\ No newline at end of file
diff --git a/vendor/github.com/consensys/gnark-crypto/field/internal/addchain/addchain.go b/vendor/github.com/consensys/gnark-crypto/field/internal/addchain/addchain.go
new file mode 100644
index 00000000000..408c26cf759
--- /dev/null
+++ b/vendor/github.com/consensys/gnark-crypto/field/internal/addchain/addchain.go
@@ -0,0 +1,327 @@
+// Original copyright :
+// BSD 3-Clause License
+
+// Copyright (c) 2019, Michael McLoughlin
+// All rights reserved.
+
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+
+// 1. Redistributions of source code must retain the above copyright notice, this
+//    list of conditions and the following disclaimer.
+
+// 2. Redistributions in binary form must reproduce the above copyright notice,
+//    this list of conditions and the following disclaimer in the documentation
+//    and/or other materials provided with the distribution.
+
+// 3. Neither the name of the copyright holder nor the names of its
+//    contributors may be used to endorse or promote products derived from
+//    this software without specific prior written permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Package addchain is derived from github.com/mmcloughlin/addchain internal packages or examples
+package addchain
+
+import (
+	"bufio"
+	"encoding/gob"
+	"log"
+	"math/big"
+	"os"
+	"path/filepath"
+	"reflect"
+	"strings"
+	"sync"
+
+	"github.com/mmcloughlin/addchain"
+	"github.com/mmcloughlin/addchain/acc"
+	"github.com/mmcloughlin/addchain/acc/ast"
+	"github.com/mmcloughlin/addchain/acc/ir"
+	"github.com/mmcloughlin/addchain/acc/pass"
+	"github.com/mmcloughlin/addchain/acc/printer"
+	"github.com/mmcloughlin/addchain/alg/ensemble"
+	"github.com/mmcloughlin/addchain/alg/exec"
+	"github.com/mmcloughlin/addchain/meta"
+)
+
+// most of these functions are derived from github.com/mmcloughlin/addchain internal packages or examples
+
+var (
+	once        sync.Once
+	addChainDir string
+	mAddchains  map[string]*AddChainData // key is big.Int.Text(16)
+)
+
+// GetAddChain retunrs template data of a short addition chain for given big.Int
+func GetAddChain(n *big.Int) *AddChainData {
+
+	// init the cache only once.
+	once.Do(initCache)
+
+	key := n.Text(16)
+	if r, ok := mAddchains[key]; ok {
+		return r
+	}
+
+	// Default ensemble of algorithms.
+	algorithms := ensemble.Ensemble()
+
+	// Use parallel executor.
+	ex := exec.NewParallel()
+	results := ex.Execute(n, algorithms)
+
+	// Output best result.
+	best := 0
+	for i, r := range results {
+		if r.Err != nil {
+			log.Fatal(r.Err)
+		}
+		if len(results[i].Program) < len(results[best].Program) {
+			best = i
+		}
+	}
+	r := results[best]
+	data := processSearchResult(r.Program, key)
+
+	mAddchains[key] = data
+	// gob encode
+	file := filepath.Join(addChainDir, key)
+	log.Println("saving addchain", file)
+	f, err := os.Create(file)
+	if err != nil {
+		log.Fatal(err)
+	}
+	enc := gob.NewEncoder(f)
+
+	if err := enc.Encode(r.Program); err != nil {
+		_ = f.Close()
+		log.Fatal(err)
+	}
+	_ = f.Close()
+
+	return data
+}
+
+func processSearchResult(_p addchain.Program, n string) *AddChainData {
+	p, err := acc.Decompile(_p)
+	if err != nil {
+		log.Fatal(err)
+	}
+	chain, err := acc.Build(p)
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	data, err := prepareAddChainData(chain, n)
+	if err != nil {
+		log.Fatal(err)
+	}
+	return data
+}
+
+// Data provided to templates.
+type AddChainData struct {
+	// Chain is the addition chain as a list of integers.
+	Chain addchain.Chain
+
+	// Ops is the complete sequence of addition operations required to compute
+	// the addition chain.
+	Ops addchain.Program
+
+	// Script is the condensed representation of the addition chain computation
+	// in the "addition chain calculator" language.
+	Script *ast.Chain
+
+	// Program is the intermediate representation of the addition chain
+	// computation. This representation is likely the most convenient for code
+	// generation. It contains a sequence of add, double and shift (repeated
+	// doubling) instructions required to compute the chain. Temporary variable
+	// allocation has been performed and the list of required temporaries
+	// populated.
+	Program *ir.Program
+
+	// Metadata about the addchain project and the specific release parameters.
+	// Please use this to include a reference or citation back to the addchain
+	// project in your generated output.
+	Meta *meta.Properties
+
+	N string // base 16 value of the value
+}
+
+// PrepareData builds input template data for the given addition chain script.
+func prepareAddChainData(s *ast.Chain, n string) (*AddChainData, error) {
+	// Prepare template data.
+	allocator := pass.Allocator{
+		Input:  "x",
+		Output: "z",
+		Format: "t%d",
+	}
+	// Translate to IR.
+	p, err := acc.Translate(s)
+	if err != nil {
+		return nil, err
+	}
+
+	// Apply processing passes: temporary variable allocation, and computing the
+	// full addition chain sequence and operations.
+	if err := pass.Exec(p, allocator, pass.Func(pass.Eval)); err != nil {
+		return nil, err
+	}
+
+	return &AddChainData{
+		Chain:   p.Chain,
+		Ops:     p.Program,
+		Script:  s,
+		Program: p,
+		Meta:    meta.Meta,
+		N:       n,
+	}, nil
+}
+
+// Function is a function provided to templates.
+type Function struct {
+	Name        string
+	Description string
+	Func        interface{}
+}
+
+// Signature returns the function signature.
+func (f *Function) Signature() string {
+	return reflect.ValueOf(f.Func).Type().String()
+}
+
+// Functions is the list of functions provided to templates.
+var Functions = []*Function{
+	{
+		Name:        "add_",
+		Description: "If the input operation is an `ir.Add` then return it, otherwise return `nil`",
+		Func: func(op ir.Op) ir.Op {
+			if a, ok := op.(ir.Add); ok {
+				return a
+			}
+			return nil
+		},
+	},
+	{
+		Name:        "double_",
+		Description: "If the input operation is an `ir.Double` then return it, otherwise return `nil`",
+		Func: func(op ir.Op) ir.Op {
+			if d, ok := op.(ir.Double); ok {
+				return d
+			}
+			return nil
+		},
+	},
+	{
+		Name:        "shift_",
+		Description: "If the input operation is an `ir.Shift` then return it, otherwise return `nil`",
+		Func: func(op ir.Op) ir.Op {
+			if s, ok := op.(ir.Shift); ok {
+				return s
+			}
+			return nil
+		},
+	},
+	{
+		Name:        "inc_",
+		Description: "Increment an integer",
+		Func:        func(n int) int { return n + 1 },
+	},
+	{
+		Name:        "format_",
+		Description: "Formats an addition chain script (`*ast.Chain`) as a string",
+		Func:        printer.String,
+	},
+	{
+		Name:        "split_",
+		Description: "Calls `strings.Split`",
+		Func:        strings.Split,
+	},
+	{
+		Name:        "join_",
+		Description: "Calls `strings.Join`",
+		Func:        strings.Join,
+	},
+	{
+		Name:        "lines_",
+		Description: "Split input string into lines",
+		Func: func(s string) []string {
+			var lines []string
+			scanner := bufio.NewScanner(strings.NewReader(s))
+			for scanner.Scan() {
+				lines = append(lines, scanner.Text())
+			}
+			return lines
+		},
+	},
+	{
+		Name:        "ptr_",
+		Description: "adds & if it's a value",
+		Func: func(s *ir.Operand) string {
+			if s.String() == "x" {
+				return "&"
+			}
+			return ""
+		},
+	},
+	{
+		Name: "last_",
+		Func: func(x int, a interface{}) bool {
+			return x == reflect.ValueOf(a).Len()-1
+		},
+	},
+}
+
+// to speed up code generation, we cache addchain search results on disk
+func initCache() {
+	mAddchains = make(map[string]*AddChainData)
+
+	// read existing files in addchain directory
+	path, err := os.Getwd()
+	if err != nil {
+		log.Fatal(err)
+	}
+	addChainDir = filepath.Join(path, "addchain")
+	_ = os.Mkdir(addChainDir, 0700)
+	files, err := os.ReadDir(addChainDir)
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	// preload pre-computed add chains
+	for _, entry := range files {
+		if entry.IsDir() {
+			continue
+		}
+		f, err := os.Open(filepath.Join(addChainDir, entry.Name()))
+		if err != nil {
+			log.Fatal(err)
+		}
+
+		// decode the addchain.Program
+		dec := gob.NewDecoder(f)
+		var p addchain.Program
+		err = dec.Decode(&p)
+		_ = f.Close()
+		if err != nil {
+			log.Fatal(err)
+		}
+		data := processSearchResult(p, filepath.Base(f.Name()))
+		log.Println("read", filepath.Base(f.Name()))
+
+		// save the data
+		mAddchains[filepath.Base(f.Name())] = data
+
+	}
+
+}
diff --git a/vendor/github.com/consensys/gnark-crypto/internal/generator/config/bls12-377.go b/vendor/github.com/consensys/gnark-crypto/internal/generator/config/bls12-377.go
new file mode 100644
index 00000000000..4416e2a60ea
--- /dev/null
+++ b/vendor/github.com/consensys/gnark-crypto/internal/generator/config/bls12-377.go
@@ -0,0 +1,29 @@
+package config
+
+var BLS12_377 = Curve{
+	Name:         "bls12-377",
+	CurvePackage: "bls12377",
+	EnumID:       "BLS12_377",
+	FrModulus:    "8444461749428370424248824938781546531375899335154063827935233455917409239041",
+	FpModulus:    "258664426012969094010652733694893533536393512754914660539884262666720468348340822774968888139573360124440321458177",
+	G1: Point{
+		CoordType:        "fp.Element",
+		PointName:        "g1",
+		GLV:              true,
+		CofactorCleaning: true,
+		CRange:           defaultCRange(),
+	},
+	G2: Point{
+		CoordType:        "fptower.E2",
+		PointName:        "g2",
+		GLV:              true,
+		CofactorCleaning: true,
+		CRange:           defaultCRange(),
+		Projective:       true,
+	},
+}
+
+func init() {
+	addCurve(&BLS12_377)
+
+}
diff --git a/vendor/github.com/consensys/gnark-crypto/internal/generator/config/bls12-381.go b/vendor/github.com/consensys/gnark-crypto/internal/generator/config/bls12-381.go
new file mode 100644
index 00000000000..986b28e472f
--- /dev/null
+++ b/vendor/github.com/consensys/gnark-crypto/internal/generator/config/bls12-381.go
@@ -0,0 +1,29 @@
+package config
+
+var BLS12_381 = Curve{
+	Name:         "bls12-381",
+	CurvePackage: "bls12381",
+	EnumID:       "BLS12_381",
+	FrModulus:    "52435875175126190479447740508185965837690552500527637822603658699938581184513",
+	FpModulus:    "4002409555221667393417789825735904156556882819939007885332058136124031650490837864442687629129015664037894272559787",
+	G1: Point{
+		CoordType:        "fp.Element",
+		PointName:        "g1",
+		GLV:              true,
+		CofactorCleaning: true,
+		CRange:           defaultCRange(),
+	},
+	G2: Point{
+		CoordType:        "fptower.E2",
+		PointName:        "g2",
+		GLV:              true,
+		CofactorCleaning: true,
+		CRange:           defaultCRange(),
+		Projective:       true,
+	},
+}
+
+func init() {
+	addCurve(&BLS12_381)
+
+}
diff --git a/vendor/github.com/consensys/gnark-crypto/internal/generator/config/bls24-315.go b/vendor/github.com/consensys/gnark-crypto/internal/generator/config/bls24-315.go
new file mode 100644
index 00000000000..0c07686195f
--- /dev/null
+++ b/vendor/github.com/consensys/gnark-crypto/internal/generator/config/bls24-315.go
@@ -0,0 +1,29 @@
+package config
+
+var BLS24_315 = Curve{
+	Name:         "bls24-315",
+	CurvePackage: "bls24315",
+	EnumID:       "BLS24_315",
+	FrModulus:    "11502027791375260645628074404575422495959608200132055716665986169834464870401",
+	FpModulus:    "39705142709513438335025689890408969744933502416914749335064285505637884093126342347073617133569",
+	G1: Point{
+		CoordType:        "fp.Element",
+		PointName:        "g1",
+		GLV:              true,
+		CofactorCleaning: true,
+		CRange:           defaultCRange(),
+	},
+	G2: Point{
+		CoordType:        "fptower.E4",
+		PointName:        "g2",
+		GLV:              true,
+		CofactorCleaning: true,
+		CRange:           defaultCRange(),
+		Projective:       true,
+	},
+}
+
+func init() {
+	addCurve(&BLS24_315)
+
+}
diff --git a/vendor/github.com/consensys/gnark-crypto/internal/generator/config/bn254.go b/vendor/github.com/consensys/gnark-crypto/internal/generator/config/bn254.go
new file mode 100644
index 00000000000..2b43f717a60
--- /dev/null
+++ b/vendor/github.com/consensys/gnark-crypto/internal/generator/config/bn254.go
@@ -0,0 +1,28 @@
+package config
+
+var BN254 = Curve{
+	Name:         "bn254",
+	CurvePackage: "bn254",
+	EnumID:       "BN254",
+	FrModulus:    "21888242871839275222246405745257275088548364400416034343698204186575808495617",
+	FpModulus:    "21888242871839275222246405745257275088696311157297823662689037894645226208583",
+	G1: Point{
+		CoordType:        "fp.Element",
+		PointName:        "g1",
+		GLV:              true,
+		CofactorCleaning: false,
+		CRange:           defaultCRange(),
+	},
+	G2: Point{
+		CoordType:        "fptower.E2",
+		PointName:        "g2",
+		GLV:              true,
+		CofactorCleaning: true,
+		CRange:           defaultCRange(),
+		Projective:       true,
+	},
+}
+
+func init() {
+	addCurve(&BN254)
+}
diff --git a/vendor/github.com/consensys/gnark-crypto/internal/generator/config/bw6-633.go b/vendor/github.com/consensys/gnark-crypto/internal/generator/config/bw6-633.go
new file mode 100644
index 00000000000..9ff8f9db83e
--- /dev/null
+++ b/vendor/github.com/consensys/gnark-crypto/internal/generator/config/bw6-633.go
@@ -0,0 +1,28 @@
+package config
+
+var BW6_633 = Curve{
+	Name:         "bw6-633",
+	CurvePackage: "bw6633",
+	EnumID:       "BW6_633",
+	FrModulus:    "39705142709513438335025689890408969744933502416914749335064285505637884093126342347073617133569",
+	FpModulus:    "20494478644167774678813387386538961497669590920908778075528754551012016751717791778743535050360001387419576570244406805463255765034468441182772056330021723098661967429339971741066259394985997",
+	G1: Point{
+		CoordType:        "fp.Element",
+		PointName:        "g1",
+		GLV:              true,
+		CofactorCleaning: true,
+		CRange:           []int{4, 5, 8, 16},
+		Projective:       true,
+	},
+	G2: Point{
+		CoordType:        "fp.Element",
+		PointName:        "g2",
+		GLV:              true,
+		CofactorCleaning: true,
+		CRange:           []int{4, 5, 8, 16},
+	},
+}
+
+func init() {
+	addCurve(&BW6_633)
+}
diff --git a/vendor/github.com/consensys/gnark-crypto/internal/generator/config/bw6-761.go b/vendor/github.com/consensys/gnark-crypto/internal/generator/config/bw6-761.go
new file mode 100644
index 00000000000..9f10c63f6d4
--- /dev/null
+++ b/vendor/github.com/consensys/gnark-crypto/internal/generator/config/bw6-761.go
@@ -0,0 +1,28 @@
+package config
+
+var BW6_761 = Curve{
+	Name:         "bw6-761",
+	CurvePackage: "bw6761",
+	EnumID:       "BW6_761",
+	FrModulus:    "258664426012969094010652733694893533536393512754914660539884262666720468348340822774968888139573360124440321458177",
+	FpModulus:    "6891450384315732539396789682275657542479668912536150109513790160209623422243491736087683183289411687640864567753786613451161759120554247759349511699125301598951605099378508850372543631423596795951899700429969112842764913119068299",
+	G1: Point{
+		CoordType:        "fp.Element",
+		PointName:        "g1",
+		GLV:              true,
+		CofactorCleaning: true,
+		CRange:           []int{4, 5, 8, 16},
+		Projective:       true,
+	},
+	G2: Point{
+		CoordType:        "fp.Element",
+		PointName:        "g2",
+		GLV:              true,
+		CofactorCleaning: true,
+		CRange:           []int{4, 5, 8, 16},
+	},
+}
+
+func init() {
+	addCurve(&BW6_761)
+}
diff --git a/vendor/github.com/consensys/gnark-crypto/internal/generator/config/curve.go b/vendor/github.com/consensys/gnark-crypto/internal/generator/config/curve.go
new file mode 100644
index 00000000000..18c079a90f1
--- /dev/null
+++ b/vendor/github.com/consensys/gnark-crypto/internal/generator/config/curve.go
@@ -0,0 +1,71 @@
+package config
+
+import (
+	"math/big"
+
+	"github.com/consensys/gnark-crypto/field"
+)
+
+// Curve describes parameters of the curve useful for the template
+type Curve struct {
+	Name         string
+	CurvePackage string
+	Package      string // current package being generated
+	EnumID       string
+	FpModulus    string
+	FrModulus    string
+
+	Fp           *field.Field
+	Fr           *field.Field
+	FpUnusedBits int
+
+	FpInfo, FrInfo Field
+	G1             Point
+	G2             Point
+}
+
+type Field struct {
+	Bits    int
+	Bytes   int
+	Modulus func() *big.Int
+}
+
+func (c Curve) Equal(other Curve) bool {
+	return c.Name == other.Name
+}
+
+type Point struct {
+	CoordType        string
+	PointName        string
+	GLV              bool  // scalar mulitplication using GLV
+	CofactorCleaning bool  // flag telling if the Cofactor cleaning is available
+	CRange           []int // multiexp bucket method: generate inner methods (with const arrays) for each c
+	Projective       bool  // generate projective coordinates
+}
+
+var Curves []Curve
+
+func defaultCRange() []int {
+	// default range for C values in the multiExp
+	return []int{4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 20, 21, 22}
+}
+
+func addCurve(c *Curve) {
+	// init FpInfo and FrInfo
+	c.FpInfo = newFieldInfo(c.FpModulus)
+	c.FrInfo = newFieldInfo(c.FrModulus)
+	Curves = append(Curves, *c)
+}
+
+func newFieldInfo(modulus string) Field {
+	var F Field
+	var bModulus big.Int
+	if _, ok := bModulus.SetString(modulus, 10); !ok {
+		panic("invalid modulus " + modulus)
+	}
+
+	F.Bits = bModulus.BitLen()
+	F.Bytes = len(bModulus.Bits()) * 8
+	F.Modulus = func() *big.Int { return new(big.Int).Set(&bModulus) }
+	return F
+}
diff --git a/vendor/github.com/mmcloughlin/addchain/.gitignore b/vendor/github.com/mmcloughlin/addchain/.gitignore
new file mode 100644
index 00000000000..849ddff3b7e
--- /dev/null
+++ b/vendor/github.com/mmcloughlin/addchain/.gitignore
@@ -0,0 +1 @@
+dist/
diff --git a/vendor/github.com/mmcloughlin/addchain/.golangci.yml b/vendor/github.com/mmcloughlin/addchain/.golangci.yml
new file mode 100644
index 00000000000..1a654ae7438
--- /dev/null
+++ b/vendor/github.com/mmcloughlin/addchain/.golangci.yml
@@ -0,0 +1,39 @@
+linters:
+  enable-all: true
+  disable:
+    - cyclop
+    - exhaustivestruct
+    - forbidigo
+    - funlen
+    - gochecknoglobals
+    - gocognit
+    - goerr113
+    - gomnd
+    - ifshort
+    - interfacer
+    - lll
+    - maligned
+    - nlreturn
+    - paralleltest
+    - prealloc
+    - predeclared
+    - revive
+    - testpackage
+    - thelper
+    - wastedassign
+    - wrapcheck
+    - wsl
+
+linters-settings:
+  gci:
+    local-prefixes: github.com/mmcloughlin/addchain
+
+issues:
+  exclude-use-default: false
+  exclude:
+    # gosec: G304: Potential file inclusion via variable.
+    - G304
+    # gosec: G306: Expect WriteFile permissions to be 0600 or less
+    - G306
+    # gosec: G404: Use of weak random number generator
+    - G404
diff --git a/vendor/github.com/mmcloughlin/addchain/.goreleaser.yml b/vendor/github.com/mmcloughlin/addchain/.goreleaser.yml
new file mode 100644
index 00000000000..4c1df74be2f
--- /dev/null
+++ b/vendor/github.com/mmcloughlin/addchain/.goreleaser.yml
@@ -0,0 +1,28 @@
+before:
+  hooks:
+  - go mod tidy
+builds:
+  - main: ./cmd/addchain
+    env:
+      - CGO_ENABLED=0
+    goos:
+      - linux
+      - windows
+      - darwin
+    goarch:
+      - amd64
+      - arm64
+    ldflags:
+      - -s -w
+      - -X github.com/mmcloughlin/addchain/meta.buildversion={{ .Version }}
+archives:
+  - format_overrides:
+      - goos: windows
+        format: zip
+    files:
+      - LICENSE*
+      - CITATION*
+      - README*
+release:
+  draft: true
+  prerelease: auto
diff --git a/vendor/github.com/mmcloughlin/addchain/.zenodo.json b/vendor/github.com/mmcloughlin/addchain/.zenodo.json
new file mode 100644
index 00000000000..4501f173c4a
--- /dev/null
+++ b/vendor/github.com/mmcloughlin/addchain/.zenodo.json
@@ -0,0 +1,50 @@
+{
+    "title": "mmcloughlin/addchain: v0.4.0",
+    "description": "Cryptographic Addition Chain Generation in Go",
+    "version": "0.4.0",
+    "publication_date": "2021-10-30",
+    "upload_type": "software",
+    "access_right": "open",
+    "license": "BSD-3-Clause",
+    "creators": [
+        {
+            "name": "McLoughlin, Michael Ben",
+            "orcid": "0000-0003-2347-6258"
+        }
+    ],
+    "related_identifiers": [
+        {
+            "identifier": "https://github.com/mmcloughlin/addchain/tree/v0.4.0",
+            "relation": "isSupplementTo",
+            "scheme": "url"
+        }
+    ],
+    "references": [
+        "Adamu Muhammad Noma, Abdullah Muhammed, Mohamad Afendee Mohamed and Zuriati Ahmad Zulkarnain. A Review on Heuristics for Addition Chain Problem: Towards Efficient Public Key Cryptosystems. Journal of Computer Science. 2017. https://thescipub.com/pdf/10.3844/jcssp.2017.275.289.pdf",
+        "Diego F. Aranha, Paulo S. L. M. Barreto, Geovandro C. C. F. Pereira and Jefferson E. Ricardini. A note on high-security general-purpose elliptic curves. Cryptology ePrint Archive, Report 2013/647. 2013. https://eprint.iacr.org/2013/647",
+        "Bos, Jurjen and Coster, Matthijs. Addition Chain Heuristics. In Advances in Cryptology --- CRYPTO' 89 Proceedings, pages 400--407. 1990. https://link.springer.com/content/pdf/10.1007/0-387-34805-0_37.pdf",
+        "Martin Otto. Brauer addition-subtraction chains. PhD thesis, Universitat Paderborn. 2001. http://www.martin-otto.de/publications/docs/2001_MartinOtto_Diplom_BrauerAddition-SubtractionChains.pdf",
+        "F Bergeron, J Berstel, S Brlek and C Duboc. Addition chains using continued fractions. Journal of Algorithms. 1989. http://www-igm.univ-mlv.fr/~berstel/Articles/1989AdditionChainDuboc.pdf",
+        "Richard E. Crandall. Method and apparatus for public key exchange in a cryptographic system. US Patent 5,159,632. 1992. https://patents.google.com/patent/US5159632A",
+        "Bernstein, Daniel J. Curve25519: New Diffie-Hellman Speed Records. In Public Key Cryptography - PKC 2006, pages 207--228. 2006. https://cr.yp.to/ecdh/curve25519-20060209.pdf",
+        "Brian Smith. The Most Efficient Known Addition Chains for Field Element and Scalar Inversion for the Most Popular and Most Unpopular Elliptic Curves. 2017. https://briansmith.org/ecc-inversion-addition-chains-01 (accessed June 30, 2019)",
+        "Bergeron, F., Berstel, J. and Brlek, S. Efficient computation of addition chains. Journal de theorie des nombres de Bordeaux. 1994. http://www.numdam.org/item/JTNB_1994__6_1_21_0",
+        "Daniel J. Bernstein, Mike Hamburg, Anna Krasnova and Tanja Lange. Elligator: Elliptic-curve points indistinguishable from uniform random strings. Cryptology ePrint Archive, Report 2013/325. 2013. https://eprint.iacr.org/2013/325",
+        "NIST. Digital Signature Standard (DSS). Federal Information Processing Standards Publication 186-2. 2000. https://csrc.nist.gov/csrc/media/publications/fips/186/2/archive/2000-01-27/documents/fips186-2.pdf",
+        "Amadou Tall and Ali Yassin Sanghare. Efficient computation of addition-subtraction chains using generalized continued Fractions. Cryptology ePrint Archive, Report 2013/466. 2013. https://eprint.iacr.org/2013/466",
+        "Kunihiro, Noboru and Yamamoto, Hirosuke. New Methods for Generating Short Addition Chains. IEICE Transactions on Fundamentals of Electronics Communications and Computer Sciences. 2000. https://pdfs.semanticscholar.org/b398/d10faca35af9ce5a6026458b251fd0a5640c.pdf",
+        "Riad S. Wahby. kwantam/addchain. Github Repository. Apache License, Version 2.0. 2018. https://github.com/kwantam/addchain",
+        "Mike Hamburg. Ed448-Goldilocks, a new elliptic curve. Cryptology ePrint Archive, Report 2015/625. 2015. https://eprint.iacr.org/2015/625",
+        "Riad S. Wahby and Dan Boneh. Fast and simple constant-time hashing to the BLS12-381 elliptic curve. Cryptology ePrint Archive, Report 2019/403. 2019. https://eprint.iacr.org/2019/403",
+        "Christophe Doche. Exponentiation. Handbook of Elliptic and Hyperelliptic Curve Cryptography, chapter 9. 2006. http://koclab.cs.ucsb.edu/teaching/ecc/eccPapers/Doche-ch09.pdf",
+        "Knuth, Donald E. Evaluation of Powers. The Art of Computer Programming, Volume 2 (Third Edition): Seminumerical Algorithms, chapter 4.6.3. 1997. https://www-cs-faculty.stanford.edu/~knuth/taocp.html",
+        "Ayan Nandy. Modifications of Bos and Coster’s Heuristics in search of a shorter addition chain for faster exponentiation. Masters thesis, Indian Statistical Institute Kolkata. 2011. http://library.isical.ac.in:8080/jspui/bitstream/10263/6441/1/DISS-285.pdf",
+        "F. L. Ţiplea, S. Iftene, C. Hriţcu, I. Goriac, R. Gordân and E. Erbiceanu. MpNT: A Multi-Precision Number Theory Package, Number Theoretical Algorithms (I). Technical Report TR03-02, Faculty of Computer Science, \"Alexandru Ioan Cuza\" University, Iasi. 2003. https://profs.info.uaic.ro/~tr/tr03-02.pdf",
+        "Daniel J. Bernstein and Tanja Lange. Security dangers of the NIST curves. 2013. https://cr.yp.to/talks/2013.09.16/slides-djb-20130916-a4.pdf",
+        "Michael Scott, Naomi Benger, Manuel Charlemagne, Luis J. Dominguez Perez and Ezekiel J. Kachisa. On the final exponentiation for calculating pairings on ordinary elliptic curves. Cryptology ePrint Archive, Report 2008/490. 2008. https://eprint.iacr.org/2008/490",
+        "Daniel J. Bernstein and Tanja Lange. SafeCurves: choosing safe curves for elliptic-curve cryptography. https://safecurves.cr.yp.to",
+        "Certicom Research. SEC 2: Recommended Elliptic Curve Domain Parameters, Version 2.0. Standards for Efficient Cryptography 2. 2010. https://safecurves.cr.yp.to/www.secg.org/sec2-v2.pdf",
+        "Jerome A. Solinas. Generalized Mersenne Primes. Technical Report CORR 99-39, Centre for Applied Cryptographic Research (CACR) at the University of Waterloo. 1999. http://cacr.uwaterloo.ca/techreports/1999/corr99-39.pdf",
+        "Stam, Martijn. Speeding up subgroup cryptosystems. PhD thesis, Technische Universiteit Eindhoven. 2003. https://cr.yp.to/bib/2003/stam-thesis.pdf"
+    ]
+}
diff --git a/vendor/github.com/mmcloughlin/addchain/CITATION.bib b/vendor/github.com/mmcloughlin/addchain/CITATION.bib
new file mode 100644
index 00000000000..4bddef61f5d
--- /dev/null
+++ b/vendor/github.com/mmcloughlin/addchain/CITATION.bib
@@ -0,0 +1,11 @@
+@misc{addchain,
+    title        = {addchain: Cryptographic Addition Chain Generation in Go},
+    author       = {Michael B. McLoughlin},
+    year         = 2021,
+    month        = oct,
+    howpublished = {Repository \url{https://github.com/mmcloughlin/addchain}},
+    version      = {0.4.0},
+    license      = {BSD 3-Clause License},
+    doi          = {10.5281/zenodo.5622943},
+    url          = {https://doi.org/10.5281/zenodo.5622943},
+}
diff --git a/vendor/github.com/mmcloughlin/addchain/CITATION.cff b/vendor/github.com/mmcloughlin/addchain/CITATION.cff
new file mode 100644
index 00000000000..252327d5ee3
--- /dev/null
+++ b/vendor/github.com/mmcloughlin/addchain/CITATION.cff
@@ -0,0 +1,19 @@
+cff-version: 1.2.0
+message: "If you use addchain in your work, a citation would be appreciated using the following metadata."
+title: "addchain: Cryptographic Addition Chain Generation in Go"
+authors:
+  - family-names: "McLoughlin"
+    given-names: "Michael Ben"
+    orcid: "https://orcid.org/0000-0003-2347-6258"
+version: "0.4.0"
+date-released: "2021-10-30"
+license: BSD-3-Clause
+repository-code: https://github.com/mmcloughlin/addchain
+doi: "10.5281/zenodo.5622943"
+identifiers:
+  - type: doi
+    value: "10.5281/zenodo.4625263"
+    description: "The concept DOI of the work."
+  - type: doi
+    value: "10.5281/zenodo.5622943"
+    description: "The versioned DOI for version 0.4.0 of the work."
diff --git a/vendor/github.com/mmcloughlin/addchain/LICENSE b/vendor/github.com/mmcloughlin/addchain/LICENSE
new file mode 100644
index 00000000000..bc3d0d51028
--- /dev/null
+++ b/vendor/github.com/mmcloughlin/addchain/LICENSE
@@ -0,0 +1,29 @@
+BSD 3-Clause License
+
+Copyright (c) 2019, Michael McLoughlin
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+   contributors may be used to endorse or promote products derived from
+   this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/vendor/github.com/mmcloughlin/addchain/README.md b/vendor/github.com/mmcloughlin/addchain/README.md
new file mode 100644
index 00000000000..6a1664df09f
--- /dev/null
+++ b/vendor/github.com/mmcloughlin/addchain/README.md
@@ -0,0 +1,402 @@
+<p align="center">
+  <img src="logo.svg" width="40%" border="0" alt="addchain" />
+  <br />
+  <img src="https://img.shields.io/github/workflow/status/mmcloughlin/addchain/ci/master.svg?style=flat-square" alt="Build Status" />
+  <a href="https://pkg.go.dev/github.com/mmcloughlin/addchain"><img src="https://img.shields.io/badge/doc-reference-007d9b?logo=go&style=flat-square" alt="go.dev" /></a>
+  <a href="https://goreportcard.com/report/github.com/mmcloughlin/addchain"><img src="https://goreportcard.com/badge/github.com/mmcloughlin/addchain?style=flat-square" alt="Go Report Card" /></a>
+  <a href="https://doi.org/10.5281/zenodo.5622943"><img src="https://img.shields.io/badge/DOI-10.5281%2Fzenodo.5622943-007ec6?style=flat-square" alt="DOI: 10.5281/zenodo.5622943" /></a>
+</p>
+
+<p align="center">Cryptographic Addition Chain Generation in Go</p>
+
+`addchain` generates short addition chains for exponents of cryptographic
+interest with [results](#results) rivaling the best hand-optimized chains.
+Intended as a building block in elliptic curve or other cryptographic code
+generators.
+
+* Suite of algorithms from academic research: continued fractions,
+  dictionary-based and Bos-Coster heuristics
+* Custom run-length techniques exploit structure of cryptographic exponents
+  with excellent results on Solinas primes
+* Generic optimization methods eliminate redundant operations
+* Simple domain-specific language for addition chain computations
+* Command-line interface or library
+* Code generation and templated output support
+
+## Table of Contents
+
+* [Background](#background)
+* [Results](#results)
+* [Usage](#usage)
+  * [Command-line Interface](#command-line-interface)
+  * [Library](#library)
+* [Algorithms](#algorithms)
+  * [Binary](#binary)
+  * [Continued Fractions](#continued-fractions)
+  * [Bos-Coster Heuristics](#bos-coster-heuristics)
+  * [Dictionary](#dictionary)
+  * [Runs](#runs)
+  * [Optimization](#optimization)
+* [Citing](#citing)
+* [Thanks](#thanks)
+* [Contributing](#contributing)
+* [License](#license)
+
+
+## Background
+
+An [_addition chain_](https://en.wikipedia.org/wiki/Addition_chain) for a
+target integer _n_ is a sequence of numbers starting at 1 and ending at _n_
+such that every term is a sum of two numbers appearing earlier in the
+sequence. For example, an addition chain for 29 is
+
+```
+1, 2, 4, 8, 9, 17, 25, 29
+```
+
+Addition chains arise in the optimization of exponentiation algorithms with
+fixed exponents. For example, the addition chain above corresponds to the
+following sequence of multiplications to compute <code>x<sup>29</sup></code>
+
+<pre>
+ x<sup>2</sup> = x<sup>1</sup> * x<sup>1</sup>
+ x<sup>4</sup> = x<sup>2</sup> * x<sup>2</sup>
+ x<sup>8</sup> = x<sup>4</sup> * x<sup>4</sup>
+ x<sup>9</sup> = x<sup>1</sup> * x<sup>8</sup>
+x<sup>17</sup> = x<sup>8</sup> * x<sup>9</sup>
+x<sup>25</sup> = x<sup>8</sup> * x<sup>17</sup>
+x<sup>29</sup> = x<sup>4</sup> * x<sup>25</sup>
+</pre>
+
+An exponentiation algorithm for a fixed exponent _n_ reduces to finding a
+_minimal length addition chain_ for _n_. This is especially relevent in
+cryptography where exponentiation by huge fixed exponents forms a
+performance-critical component of finite-field arithmetic. In particular,
+constant-time inversion modulo a prime _p_ is performed by computing
+<code>x<sup>p-2</sup> (mod p)</code>, thanks to [Fermat's Little
+Theorem](https://en.wikipedia.org/wiki/Fermat%27_little_theorem). Square root
+also reduces to exponentiation for some prime moduli. Finding short addition
+chains for these exponents is one important part of high-performance finite
+field implementations required for elliptic curve cryptography or RSA.
+
+Minimal addition chain search is famously hard. No practical optimal
+algorithm is known, especially for cryptographic exponents of size 256-bits
+and up. Given its importance for the performance of cryptographic
+implementations, implementers devote significant effort to hand-tune addition
+chains. The goal of the `addchain` project is to match or exceed the best
+hand-optimized addition chains using entirely automated approaches, building
+on extensive academic research and applying new tweaks that exploit the
+unique nature of cryptographic exponents.
+
+## Results
+
+The following table shows the results of the `addchain` library on popular
+cryptographic exponents. For each one we also show the length of the [best
+known hand-optimized addition chain](https://briansmith.org/ecc-inversion-addition-chains-01), and the
+delta from the library result.
+
+| Name | This Library | Best Known | Delta |
+| ---- | -----------: | ---------: | ----: |
+| [Curve25519 Field Inversion](doc/results.md#curve25519-field-inversion) | 266 | 265 | +1 |
+| [NIST P-256 Field Inversion](doc/results.md#nist-p-256-field-inversion) | 266 | 266 | **+0** |
+| [NIST P-384 Field Inversion](doc/results.md#nist-p-384-field-inversion) | 397 | 396 | +1 |
+| [secp256k1 (Bitcoin) Field Inversion](doc/results.md#secp256k1-bitcoin-field-inversion) | 269 | 269 | **+0** |
+| [Curve25519 Scalar Inversion](doc/results.md#curve25519-scalar-inversion) | 283 | 284 | **-1** |
+| [NIST P-256 Scalar Inversion](doc/results.md#nist-p-256-scalar-inversion) | 294 | 292 | +2 |
+| [NIST P-384 Scalar Inversion](doc/results.md#nist-p-384-scalar-inversion) | 434 | 433 | +1 |
+| [secp256k1 (Bitcoin) Scalar Inversion](doc/results.md#secp256k1-bitcoin-scalar-inversion) | 293 | 290 | +3 |
+
+
+See [full results listing](doc/results.md) for more detail and
+results for less common exponents.
+
+These results demonstrate that `addchain` is competitive with hand-optimized
+chains, often with equivalent or better performance. Even when `addchain` is
+slightly sub-optimal, it can still be considered valuable since it fully
+automates a laborious manual process. As such, `addchain` can be trusted to
+produce high quality results in an automated code generation tool.
+
+## Usage
+
+### Command-line Interface
+
+Install a pre-compiled [release
+binary](https://github.com/mmcloughlin/addchain/releases):
+
+```
+curl -sSfL https://git.io/addchain | sh -s -- -b /usr/local/bin
+```
+
+Alternatively build from source:
+
+```
+go install github.com/mmcloughlin/addchain/cmd/addchain@latest
+```
+
+Search for a curve25519 field inversion addition chain with:
+
+```sh
+addchain search '2^255 - 19 - 2'
+```
+
+Output:
+
+```
+addchain: expr: "2^255 - 19 - 2"
+addchain: hex: 7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeb
+addchain: dec: 57896044618658097711785492504343953926634992332820282019728792003956564819947
+addchain: best: opt(runs(continued_fractions(dichotomic)))
+addchain: cost: 266
+_10       = 2*1
+_11       = 1 + _10
+_1100     = _11 << 2
+_1111     = _11 + _1100
+_11110000 = _1111 << 4
+_11111111 = _1111 + _11110000
+x10       = _11111111 << 2 + _11
+x20       = x10 << 10 + x10
+x30       = x20 << 10 + x10
+x60       = x30 << 30 + x30
+x120      = x60 << 60 + x60
+x240      = x120 << 120 + x120
+x250      = x240 << 10 + x10
+return      (x250 << 2 + 1) << 3 + _11
+```
+
+Next, you can [generate code from this addition chain](doc/gen.md).
+
+### Library
+
+Install:
+
+```
+go get -u github.com/mmcloughlin/addchain
+```
+
+Algorithms all conform to the [`alg.ChainAlgorithm`](https://pkg.go.dev/github.com/mmcloughlin/addchain/alg#ChainAlgorithm) or
+[`alg.SequenceAlgorithm`](https://pkg.go.dev/github.com/mmcloughlin/addchain/alg#SequenceAlgorithm) interfaces and can be used directly. However the
+most user-friendly method uses the [`alg/ensemble`](https://pkg.go.dev/github.com/mmcloughlin/addchain/alg/ensemble) package to
+instantiate a sensible default set of algorithms and the [`alg/exec`](https://pkg.go.dev/github.com/mmcloughlin/addchain/alg/exec)
+helper to execute them in parallel. The following code uses this method to
+find an addition chain for curve25519 field inversion:
+
+```go
+func Example() {
+	// Target number: 2²⁵⁵ - 21.
+	n := new(big.Int)
+	n.SetString("7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeb", 16)
+
+	// Default ensemble of algorithms.
+	algorithms := ensemble.Ensemble()
+
+	// Use parallel executor.
+	ex := exec.NewParallel()
+	results := ex.Execute(n, algorithms)
+
+	// Output best result.
+	best := 0
+	for i, r := range results {
+		if r.Err != nil {
+			log.Fatal(r.Err)
+		}
+		if len(results[i].Program) < len(results[best].Program) {
+			best = i
+		}
+	}
+	r := results[best]
+	fmt.Printf("best: %d\n", len(r.Program))
+	fmt.Printf("algorithm: %s\n", r.Algorithm)
+
+	// Output:
+	// best: 266
+	// algorithm: opt(runs(continued_fractions(dichotomic)))
+}
+```
+
+## Algorithms
+
+This section summarizes the algorithms implemented by `addchain` along with
+references to primary literature. See the [bibliography](doc/bibliography.md)
+for the complete references list.
+
+### Binary
+
+The [`alg/binary`](https://pkg.go.dev/github.com/mmcloughlin/addchain/alg/binary) package implements the addition chain equivalent
+of the basic [square-and-multiply exponentiation
+method](https://en.wikipedia.org/wiki/Exponentiation_by_squaring). It is
+included for completeness, but is almost always outperformed by more advanced
+algorithms below.
+
+### Continued Fractions
+
+The [`alg/contfrac`](https://pkg.go.dev/github.com/mmcloughlin/addchain/alg/contfrac) package implements the continued fractions
+methods for addition sequence search introduced by
+Bergeron-Berstel-Brlek-Duboc in 1989 and later extended. This approach
+utilizes a decomposition of an addition chain akin to continued fractions,
+namely
+
+```
+(1,..., k,..., n) = (1,...,n mod k,..., k) ⊗ (1,..., n/k) ⊕ (n mod k).
+```
+
+for certain special operators ⊗ and ⊕. This
+decomposition lends itself to a recursive algorithm for efficient addition
+sequence search, with results dependent on the _strategy_ for choosing the
+auxillary integer _k_. The [`alg/contfrac`](https://pkg.go.dev/github.com/mmcloughlin/addchain/alg/contfrac) package provides a
+laundry list of strategies from the literature: binary, co-binary,
+dichotomic, dyadic, fermat, square-root and total.
+
+#### References
+
+* F Bergeron, J Berstel, S Brlek and C Duboc. Addition chains using continued fractions. Journal of Algorithms. 1989. http://www-igm.univ-mlv.fr/~berstel/Articles/1989AdditionChainDuboc.pdf
+* Bergeron, F., Berstel, J. and Brlek, S. Efficient computation of addition chains. Journal de theorie des nombres de Bordeaux. 1994. http://www.numdam.org/item/JTNB_1994__6_1_21_0
+* Amadou Tall and Ali Yassin Sanghare. Efficient computation of addition-subtraction chains using generalized continued Fractions. Cryptology ePrint Archive, Report 2013/466. 2013. https://eprint.iacr.org/2013/466
+* Christophe Doche. Exponentiation. Handbook of Elliptic and Hyperelliptic Curve Cryptography, chapter 9. 2006. http://koclab.cs.ucsb.edu/teaching/ecc/eccPapers/Doche-ch09.pdf
+
+### Bos-Coster Heuristics
+
+Bos and Coster described an iterative algorithm for efficient addition
+sequence generation in which at each step a heuristic proposes new numbers
+for the sequence in such a way that the _maximum_ number always decreases.
+The [original Bos-Coster paper](https://link.springer.com/content/pdf/10.1007/0-387-34805-0_37.pdf) defined four
+heuristics: Approximation, Divison, Halving and Lucas. Package
+[`alg/heuristic`](https://pkg.go.dev/github.com/mmcloughlin/addchain/alg/heuristic) implements a variation on these heuristics:
+
+* **Approximation:** looks for two elements a, b in the current sequence with sum close to the largest element.
+* **Halving:** applies when the target is at least twice as big as the next largest, and if so it will propose adding a sequence of doublings.
+* **Delta Largest:** proposes adding the delta between the largest two entries in the current sequence.
+
+Divison and Lucas are not implemented due to disparities in the literature
+about their precise definition and poor results from early experiments.
+Furthermore, this library does not apply weights to the heuristics as
+suggested in the paper, rather it simply uses the first that applies. However
+both of these remain [possible avenues for
+improvement](https://github.com/mmcloughlin/addchain/issues/26).
+
+#### References
+
+* Bos, Jurjen and Coster, Matthijs. Addition Chain Heuristics. In Advances in Cryptology --- CRYPTO' 89 Proceedings, pages 400--407. 1990. https://link.springer.com/content/pdf/10.1007/0-387-34805-0_37.pdf
+* Riad S. Wahby. kwantam/addchain. Github Repository. Apache License, Version 2.0. 2018. https://github.com/kwantam/addchain
+* Christophe Doche. Exponentiation. Handbook of Elliptic and Hyperelliptic Curve Cryptography, chapter 9. 2006. http://koclab.cs.ucsb.edu/teaching/ecc/eccPapers/Doche-ch09.pdf
+* Ayan Nandy. Modifications of Bos and Coster’s Heuristics in search of a shorter addition chain for faster exponentiation. Masters thesis, Indian Statistical Institute Kolkata. 2011. http://library.isical.ac.in:8080/jspui/bitstream/10263/6441/1/DISS-285.pdf
+* F. L. Ţiplea, S. Iftene, C. Hriţcu, I. Goriac, R. Gordân and E. Erbiceanu. MpNT: A Multi-Precision Number Theory Package, Number Theoretical Algorithms (I). Technical Report TR03-02, Faculty of Computer Science, "Alexandru Ioan Cuza" University, Iasi. 2003. https://profs.info.uaic.ro/~tr/tr03-02.pdf
+* Stam, Martijn. Speeding up subgroup cryptosystems. PhD thesis, Technische Universiteit Eindhoven. 2003. https://cr.yp.to/bib/2003/stam-thesis.pdf
+
+### Dictionary
+
+Dictionary methods decompose the binary representation of a target integer _n_ into a set of dictionary _terms_, such that _n_
+may be written as a sum
+
+<pre>
+n = ∑ 2<sup>e<sub>i</sub></sup> d<sub>i</sub>
+</pre>
+
+for exponents _e_ and elements _d_ from a dictionary _D_. Given such a decomposition we can construct an addition chain for _n_ by
+
+1. Find a short addition _sequence_ containing every element of the dictionary _D_. Continued fractions and Bos-Coster heuristics can be used here.
+2. Build _n_ from the dictionary terms according to the sum decomposition.
+
+The efficiency of this approach boils down to the decomposition method. The [`alg/dict`](https://pkg.go.dev/github.com/mmcloughlin/addchain/alg/dict) package provides:
+
+* **Fixed Window:** binary representation of _n_ is broken into fixed _k_-bit windows
+* **Sliding Window**: break _n_ into _k_-bit windows, skipping zeros where possible
+* **Run Length**: decompose _n_ into runs of 1s up to a maximal length
+* **Hybrid**: mix of sliding window and run length methods
+
+#### References
+
+* Martin Otto. Brauer addition-subtraction chains. PhD thesis, Universitat Paderborn. 2001. http://www.martin-otto.de/publications/docs/2001_MartinOtto_Diplom_BrauerAddition-SubtractionChains.pdf
+* Kunihiro, Noboru and Yamamoto, Hirosuke. New Methods for Generating Short Addition Chains. IEICE Transactions on Fundamentals of Electronics Communications and Computer Sciences. 2000. https://pdfs.semanticscholar.org/b398/d10faca35af9ce5a6026458b251fd0a5640c.pdf
+* Christophe Doche. Exponentiation. Handbook of Elliptic and Hyperelliptic Curve Cryptography, chapter 9. 2006. http://koclab.cs.ucsb.edu/teaching/ecc/eccPapers/Doche-ch09.pdf
+
+### Runs
+
+The runs algorithm is a custom variant of the dictionary approach that
+decomposes a target into runs of ones. It leverages the observation that
+building a dictionary consisting of runs of 1s of lengths
+<code>l<sub>1</sub>, l<sub>2</sub>, ..., l<sub>k</sub></code> can itself be
+reduced to:
+
+1. Find an addition sequence containing the run lengths
+   <code>l<sub>i</sub></code>. As with dictionary approaches we can use
+   Bos-Coster heuristics and continued fractions here. However here we have the
+   advantage that the <code>l<sub>i</sub></code> are typically very _small_,
+   meaning that a wider range of algorithms can be brought to bear.
+2. Use the addition sequence for the run lengths <code>l<sub>i</sub></code>
+   to build an addition sequence for the runs themselves
+   <code>r(l<sub>i</sub>)</code> where <code>r(e) = 2<sup>e</sup>-1</code>. See
+   [`dict.RunsChain`](https://pkg.go.dev/github.com/mmcloughlin/addchain/alg/dict#RunsChain).
+
+This approach has proved highly effective against cryptographic exponents
+which frequently exhibit binary structure, such as those derived from
+[Solinas primes](https://en.wikipedia.org/wiki/Solinas_prime).
+
+> I have not seen this method discussed in the literature. Please help me find references to prior art if you know any.
+
+### Optimization
+
+Close inspection of addition chains produced by other algorithms revealed
+cases of redundant computation. This motivated a final optimization pass over
+addition chains to remove unecessary steps. The [`alg/opt`](https://pkg.go.dev/github.com/mmcloughlin/addchain/alg/opt) package
+implements the following optimization:
+
+1. Determine _all possible_ ways each element can be computed from those prior.
+2. Count how many times each element is used where it is the _only possible_ way of computing that entry.
+3. Prune elements that are always used in computations that have an alternative.
+
+These micro-optimizations were vital in closing the gap between `addchain`'s
+automated approaches and hand-optimized chains. This technique is reminiscent
+of basic passes in optimizing compilers, raising the question of whether
+other [compiler optimizations could apply to addition
+chains](https://github.com/mmcloughlin/addchain/issues/24)?
+
+> I have not seen this method discussed in the literature. Please help me find references to prior art if you know any.
+
+## Citing
+
+If you use `addchain` in your research a citation would be appreciated.
+Citing a specific release is preferred, since they are [archived on
+Zenodo](https://doi.org/10.5281/zenodo.4625263) and assigned a DOI. Please use the
+following BibTeX to cite the most recent [0.4.0
+release](https://github.com/mmcloughlin/addchain/releases/tag/v0.4.0).
+
+```bib
+@misc{addchain,
+    title        = {addchain: Cryptographic Addition Chain Generation in Go},
+    author       = {Michael B. McLoughlin},
+    year         = 2021,
+    month        = oct,
+    howpublished = {Repository \url{https://github.com/mmcloughlin/addchain}},
+    version      = {0.4.0},
+    license      = {BSD 3-Clause License},
+    doi          = {10.5281/zenodo.5622943},
+    url          = {https://doi.org/10.5281/zenodo.5622943},
+}
+```
+
+If you need to cite a currently unreleased version please consider [filing an
+issue](https://github.com/mmcloughlin/addchain/issues/new) to request a new
+release, or to discuss an appropriate format for the citation.
+
+## Thanks
+
+Thank you to [Tom Dean](https://web.stanford.edu/~trdean/), [Riad
+Wahby](https://wahby.org/), [Brian Smith](https://briansmith.org/) and
+[str4d](https://github.com/str4d) for advice and encouragement. Thanks also to
+[Damian Gryski](https://github.com/dgryski) and [Martin
+Glancy](https://twitter.com/mglancy) for review.
+
+## Contributing
+
+Contributions to `addchain` are welcome:
+
+* [Submit bug reports](https://github.com/mmcloughlin/addchain/issues/new) to
+  the issues page.
+* Suggest [test cases](https://github.com/mmcloughlin/addchain/blob/e6c070065205efcaa02627ab1b23e8ce6aeea1db/internal/results/results.go#L62)
+  or update best-known hand-optimized results.
+* Pull requests accepted. Please discuss in the [issues section](https://github.com/mmcloughlin/addchain/issues)
+  before starting significant work.
+
+## License
+
+`addchain` is available under the [BSD 3-Clause License](LICENSE).
diff --git a/vendor/github.com/mmcloughlin/addchain/acc/acc.go b/vendor/github.com/mmcloughlin/addchain/acc/acc.go
new file mode 100644
index 00000000000..6963ff44368
--- /dev/null
+++ b/vendor/github.com/mmcloughlin/addchain/acc/acc.go
@@ -0,0 +1,91 @@
+// Package acc implements the "addition chain calculator" language: a
+// domain-specific language (DSL) for addition chain computation.
+package acc
+
+import (
+	"bytes"
+	"io"
+	"os"
+	"strings"
+
+	"github.com/mmcloughlin/addchain/acc/ir"
+	"github.com/mmcloughlin/addchain/acc/parse"
+	"github.com/mmcloughlin/addchain/acc/pass"
+	"github.com/mmcloughlin/addchain/acc/printer"
+	"github.com/mmcloughlin/addchain/internal/errutil"
+)
+
+// LoadFile is a convenience for loading an addition chain script from a file.
+func LoadFile(filename string) (p *ir.Program, err error) {
+	f, err := os.Open(filename)
+	if err != nil {
+		return nil, err
+	}
+	defer errutil.CheckClose(&err, f)
+	return LoadReader(filename, f)
+}
+
+// LoadString is a convenience for loading and evaluating an addition chain
+// script from a string.
+func LoadString(src string) (*ir.Program, error) {
+	return LoadReader("string", strings.NewReader(src))
+}
+
+// LoadReader is a convenience for loading and evaluating an addition chain
+// script.
+func LoadReader(filename string, r io.Reader) (*ir.Program, error) {
+	// Parse to AST.
+	s, err := parse.Reader(filename, r)
+	if err != nil {
+		return nil, err
+	}
+
+	// Translate to IR.
+	p, err := Translate(s)
+	if err != nil {
+		return nil, err
+	}
+
+	// Evaluate the program.
+	if err := pass.Eval(p); err != nil {
+		return nil, err
+	}
+
+	return p, nil
+}
+
+// Write is a convenience for writing a program as an addition chain script.
+func Write(w io.Writer, p *ir.Program) error {
+	// Build AST.
+	s, err := Build(p)
+	if err != nil {
+		return err
+	}
+
+	// Print.
+	if err := printer.Fprint(w, s); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+// Save is a convenience for writing a program to a file.
+func Save(filename string, p *ir.Program) (err error) {
+	f, err := os.Create(filename)
+	if err != nil {
+		return err
+	}
+	defer errutil.CheckClose(&err, f)
+	return Write(f, p)
+}
+
+// String is a convenience for obtaining a program as an addition chain script
+// in string form.
+func String(p *ir.Program) (string, error) {
+	var buf bytes.Buffer
+	if err := Write(&buf, p); err != nil {
+		return "", err
+	}
+	return buf.String(), nil
+}
diff --git a/vendor/github.com/mmcloughlin/addchain/acc/ast/ast.go b/vendor/github.com/mmcloughlin/addchain/acc/ast/ast.go
new file mode 100644
index 00000000000..28948f55845
--- /dev/null
+++ b/vendor/github.com/mmcloughlin/addchain/acc/ast/ast.go
@@ -0,0 +1,71 @@
+// Package ast declares abstract syntax tree types for acc programs.
+package ast
+
+// Chain represents a sequence of acc statements for an addition chain
+// computation.
+type Chain struct {
+	Statements []Statement
+}
+
+// Statement assigns the result of an expression to a variable.
+type Statement struct {
+	Name Identifier
+	Expr Expr
+}
+
+// Operator precedence range.
+const (
+	LowestPrec  = 0
+	HighestPrec = 4
+)
+
+// Expr is an expression.
+type Expr interface {
+	Precedence() int
+}
+
+// Operand is an index into an addition chain.
+type Operand int
+
+// Precedence of this expression type.
+func (Operand) Precedence() int { return HighestPrec }
+
+// Identifier is a variable reference.
+type Identifier string
+
+// Precedence of this expression type.
+func (Identifier) Precedence() int { return HighestPrec }
+
+// Add expression.
+type Add struct {
+	X, Y Expr
+}
+
+// Precedence of this expression type.
+func (Add) Precedence() int { return 1 }
+
+// Shift (repeated doubling) expression.
+type Shift struct {
+	X Expr
+	S uint
+}
+
+// Precedence of this expression type.
+func (Shift) Precedence() int { return 2 }
+
+// Double expression.
+type Double struct {
+	X Expr
+}
+
+// Precedence of this expression type.
+func (Double) Precedence() int { return 3 }
+
+// IsOp reports whether the expression is the result of an operator.
+func IsOp(e Expr) bool {
+	switch e.(type) {
+	case Add, Shift, Double:
+		return true
+	}
+	return false
+}
diff --git a/vendor/github.com/mmcloughlin/addchain/acc/ast/print.go b/vendor/github.com/mmcloughlin/addchain/acc/ast/print.go
new file mode 100644
index 00000000000..4c4fcfea306
--- /dev/null
+++ b/vendor/github.com/mmcloughlin/addchain/acc/ast/print.go
@@ -0,0 +1,101 @@
+package ast
+
+import (
+	"io"
+	"os"
+
+	"github.com/mmcloughlin/addchain/internal/errutil"
+	"github.com/mmcloughlin/addchain/internal/print"
+)
+
+// Print an AST node to standard out.
+func Print(n interface{}) error {
+	return Fprint(os.Stdout, n)
+}
+
+// Fprint writes the AST node n to w.
+func Fprint(w io.Writer, n interface{}) error {
+	p := newprinter(w)
+	p.node(n)
+	return p.Error()
+}
+
+type printer struct {
+	print.Printer
+}
+
+func newprinter(w io.Writer) *printer {
+	p := &printer{
+		Printer: print.New(w),
+	}
+	p.SetIndentString(".  ")
+	return p
+}
+
+func (p *printer) node(n interface{}) {
+	switch n := n.(type) {
+	case *Chain:
+		p.enter("chain")
+		for _, stmt := range n.Statements {
+			p.statement(stmt)
+		}
+		p.leave()
+	case Statement:
+		p.statement(n)
+	case Operand:
+		p.Linef("operand(%d)", n)
+	case Identifier:
+		p.Linef("identifier(%q)", n)
+	case Add:
+		p.add(n)
+	case Double:
+		p.double(n)
+	case Shift:
+		p.shift(n)
+	default:
+		p.SetError(errutil.UnexpectedType(n))
+	}
+}
+
+func (p *printer) statement(stmt Statement) {
+	p.enter("statement")
+	p.Printf("name = ")
+	p.node(stmt.Name)
+	p.Printf("expr = ")
+	p.node(stmt.Expr)
+	p.leave()
+}
+
+func (p *printer) add(a Add) {
+	p.enter("add")
+	p.Printf("x = ")
+	p.node(a.X)
+	p.Printf("y = ")
+	p.node(a.Y)
+	p.leave()
+}
+
+func (p *printer) double(d Double) {
+	p.enter("double")
+	p.Printf("x = ")
+	p.node(d.X)
+	p.leave()
+}
+
+func (p *printer) shift(s Shift) {
+	p.enter("shift")
+	p.Linef("s = %d", s.S)
+	p.Printf("x = ")
+	p.node(s.X)
+	p.leave()
+}
+
+func (p *printer) enter(name string) {
+	p.Linef("%s {", name)
+	p.Indent()
+}
+
+func (p *printer) leave() {
+	p.Dedent()
+	p.Linef("}")
+}
diff --git a/vendor/github.com/mmcloughlin/addchain/acc/build.go b/vendor/github.com/mmcloughlin/addchain/acc/build.go
new file mode 100644
index 00000000000..38adda34994
--- /dev/null
+++ b/vendor/github.com/mmcloughlin/addchain/acc/build.go
@@ -0,0 +1,158 @@
+package acc
+
+import (
+	"fmt"
+
+	"github.com/mmcloughlin/addchain/acc/ast"
+	"github.com/mmcloughlin/addchain/acc/ir"
+	"github.com/mmcloughlin/addchain/acc/pass"
+	"github.com/mmcloughlin/addchain/internal/errutil"
+)
+
+// complexitylimit is the maximum number of operators the builder will allow an
+// expression to have.
+const complexitylimit = 5
+
+// Build AST from a program in intermediate representation.
+func Build(p *ir.Program) (*ast.Chain, error) {
+	// Run some analysis passes first.
+	err := pass.Exec(p,
+		pass.Func(pass.ReadCounts),
+		pass.NameByteValues,
+		pass.NameXRuns,
+	)
+	if err != nil {
+		return nil, err
+	}
+
+	// Delegate to builder.
+	b := newbuilder(p)
+	if err := b.process(); err != nil {
+		return nil, err
+	}
+
+	return b.chain, nil
+}
+
+type builder struct {
+	chain *ast.Chain
+	prog  *ir.Program
+	expr  map[int]ast.Expr
+}
+
+func newbuilder(p *ir.Program) *builder {
+	return &builder{
+		chain: &ast.Chain{},
+		prog:  p,
+		expr:  map[int]ast.Expr{},
+	}
+}
+
+func (b *builder) process() error {
+	insts := b.prog.Instructions
+	n := len(insts)
+	complexity := 0
+	for i := 0; i < n; i++ {
+		complexity++
+		inst := insts[i]
+		out := inst.Output
+
+		// Build expression for the result of this instruction.
+		e, err := b.operator(inst.Op)
+		if err != nil {
+			return err
+		}
+
+		b.expr[out.Index] = e
+
+		// If this output is read only by the following instruction, we don't need to
+		// commit it to a variable.
+		anon := out.Identifier == ""
+		usedonce := b.prog.ReadCount[out.Index] == 1
+		usednext := i+1 < n && ir.HasInput(insts[i+1].Op, out.Index)
+		if anon && usedonce && usednext && complexity < complexitylimit {
+			continue
+		}
+
+		// Otherwise write a statement for it.
+		b.commit(inst.Output)
+		complexity = 0
+	}
+
+	// Clear the name of the final statement.
+	b.chain.Statements[len(b.chain.Statements)-1].Name = ""
+
+	return nil
+}
+
+func (b *builder) operator(op ir.Op) (ast.Expr, error) {
+	switch o := op.(type) {
+	case ir.Add:
+		return b.add(o)
+	case ir.Double:
+		return ast.Double{
+			X: b.operand(o.X),
+		}, nil
+	case ir.Shift:
+		return ast.Shift{
+			X: b.operand(o.X),
+			S: o.S,
+		}, nil
+	default:
+		return nil, errutil.UnexpectedType(op)
+	}
+}
+
+func (b *builder) add(a ir.Add) (ast.Expr, error) {
+	// Addition operator construction is slightly delcate, since operand order
+	// determines ordering of execution. By the design of instruction processing
+	// above, the only way we can have multi-operator expressions is with a
+	// sequence of operands that are used only once and in the following
+	// instruction. This implies that only one of x and y can be an operator
+	// expression. In order to preserve execution order, whichever one that is
+	// needs to be the first operand.
+
+	x := b.operand(a.X)
+	y := b.operand(a.Y)
+
+	switch {
+	case ast.IsOp(x) && ast.IsOp(y):
+		return nil, errutil.AssertionFailure("only one of x and y should be an operator expression")
+	case ast.IsOp(y):
+		x, y = y, x
+	case ast.IsOp(x):
+		// Nothing, it's already the first operand.
+	}
+
+	return ast.Add{
+		X: x,
+		Y: y,
+	}, nil
+}
+
+func (b *builder) commit(op *ir.Operand) {
+	name := ast.Identifier(b.name(op))
+	stmt := ast.Statement{
+		Name: name,
+		Expr: b.operand(op),
+	}
+	b.chain.Statements = append(b.chain.Statements, stmt)
+	b.expr[op.Index] = name
+}
+
+// name returns the name for this operand. This is the identifier if available,
+// otherwise a sensible default based on the index.
+func (b *builder) name(op *ir.Operand) string {
+	if op.Identifier != "" {
+		return op.Identifier
+	}
+	return fmt.Sprintf("i%d", op.Index)
+}
+
+func (b *builder) operand(op *ir.Operand) ast.Expr {
+	e, ok := b.expr[op.Index]
+	if !ok {
+		return ast.Operand(op.Index)
+	}
+	return e
+}
diff --git a/vendor/github.com/mmcloughlin/addchain/acc/decompile.go b/vendor/github.com/mmcloughlin/addchain/acc/decompile.go
new file mode 100644
index 00000000000..da05ed932de
--- /dev/null
+++ b/vendor/github.com/mmcloughlin/addchain/acc/decompile.go
@@ -0,0 +1,58 @@
+package acc
+
+import (
+	"github.com/mmcloughlin/addchain"
+	"github.com/mmcloughlin/addchain/acc/ir"
+)
+
+// Decompile an unrolled program into concise intermediate representation.
+func Decompile(p addchain.Program) (*ir.Program, error) {
+	numreads := p.ReadCounts()
+	r := &ir.Program{}
+	for i := 0; i < len(p); i++ {
+		op := p[i]
+
+		// Regular addition.
+		if !op.IsDouble() {
+			r.AddInstruction(&ir.Instruction{
+				Output: ir.Index(i + 1),
+				Op: ir.Add{
+					X: ir.Index(op.I),
+					Y: ir.Index(op.J),
+				},
+			})
+			continue
+		}
+
+		// We have a double. Look ahead to see if this is a chain of doublings, which
+		// can be encoded as a shift. Note we can only follow the the doublings as
+		// long as the intermediate values are not required anywhere else later in the
+		// program.
+		j := i + 1
+		for ; j < len(p) && numreads[j] == 1 && p[j].I == j && p[j].J == j; j++ {
+		}
+
+		s := j - i
+
+		// Shift size 1 encoded as a double.
+		if s == 1 {
+			r.AddInstruction(&ir.Instruction{
+				Output: ir.Index(i + 1),
+				Op: ir.Double{
+					X: ir.Index(op.I),
+				},
+			})
+			continue
+		}
+
+		i = j - 1
+		r.AddInstruction(&ir.Instruction{
+			Output: ir.Index(i + 1),
+			Op: ir.Shift{
+				X: ir.Index(op.I),
+				S: uint(s),
+			},
+		})
+	}
+	return r, nil
+}
diff --git a/vendor/github.com/mmcloughlin/addchain/acc/ir/ir.go b/vendor/github.com/mmcloughlin/addchain/acc/ir/ir.go
new file mode 100644
index 00000000000..5f54fec82b2
--- /dev/null
+++ b/vendor/github.com/mmcloughlin/addchain/acc/ir/ir.go
@@ -0,0 +1,193 @@
+// Package ir declares an intermediate representation for acc programs.
+package ir
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/mmcloughlin/addchain"
+)
+
+// Program is a sequence of acc instructions.
+type Program struct {
+	Instructions []*Instruction
+
+	// Pass/analysis results.
+	Operands    map[int]*Operand
+	ReadCount   map[int]int
+	Program     addchain.Program
+	Chain       addchain.Chain
+	Temporaries []string
+}
+
+// AddInstruction appends an instruction to the program.
+func (p *Program) AddInstruction(i *Instruction) {
+	p.Instructions = append(p.Instructions, i)
+}
+
+// Output returns the output of the last instruction.
+func (p Program) Output() *Operand {
+	last := len(p.Instructions) - 1
+	return p.Instructions[last].Output
+}
+
+// Clone returns a copy of p. Pass results are not copied and would need to be
+// rerun on the clone.
+func (p Program) Clone() *Program {
+	c := &Program{}
+	for _, inst := range p.Instructions {
+		c.Instructions = append(c.Instructions, inst.Clone())
+	}
+	return c
+}
+
+func (p Program) String() string {
+	var b strings.Builder
+	for _, i := range p.Instructions {
+		fmt.Fprintln(&b, i)
+	}
+	return b.String()
+}
+
+// Operand represents an element of an addition chain, with an optional
+// identifier.
+type Operand struct {
+	Identifier string
+	Index      int
+}
+
+// NewOperand builds a named operand for index i.
+func NewOperand(name string, i int) *Operand {
+	return &Operand{
+		Identifier: name,
+		Index:      i,
+	}
+}
+
+// Index builds an unnamed operand for index i.
+func Index(i int) *Operand {
+	return NewOperand("", i)
+}
+
+// One is the first element in the addition chain, which by definition always
+// has the value 1.
+var One = Index(0)
+
+// Clone returns a copy of the operand.
+func (o Operand) Clone() *Operand {
+	clone := o
+	return &clone
+}
+
+func (o Operand) String() string {
+	if len(o.Identifier) > 0 {
+		return o.Identifier
+	}
+	return fmt.Sprintf("[%d]", o.Index)
+}
+
+// Instruction assigns the result of an operation to an operand.
+type Instruction struct {
+	Output *Operand
+	Op     Op
+}
+
+// Operands returns the input and output operands.
+func (i Instruction) Operands() []*Operand {
+	return append(i.Op.Inputs(), i.Output)
+}
+
+// Clone returns a copy of the instruction.
+func (i Instruction) Clone() *Instruction {
+	return &Instruction{
+		Output: i.Output.Clone(),
+		Op:     i.Op.Clone(),
+	}
+}
+
+func (i Instruction) String() string {
+	return fmt.Sprintf("%s \u2190 %s", i.Output, i.Op)
+}
+
+// Op is an operation.
+type Op interface {
+	Inputs() []*Operand
+	Clone() Op
+	String() string
+}
+
+// Add is an addition operation.
+type Add struct {
+	X, Y *Operand
+}
+
+// Inputs returns the addends.
+func (a Add) Inputs() []*Operand {
+	return []*Operand{a.X, a.Y}
+}
+
+// Clone returns a copy of the operation.
+func (a Add) Clone() Op {
+	return Add{
+		X: a.X.Clone(),
+		Y: a.Y.Clone(),
+	}
+}
+
+func (a Add) String() string {
+	return fmt.Sprintf("%s + %s", a.X, a.Y)
+}
+
+// Double is a double operation.
+type Double struct {
+	X *Operand
+}
+
+// Inputs returns the operand.
+func (d Double) Inputs() []*Operand {
+	return []*Operand{d.X}
+}
+
+// Clone returns a copy of the operation.
+func (d Double) Clone() Op {
+	return Double{
+		X: d.X.Clone(),
+	}
+}
+
+func (d Double) String() string {
+	return fmt.Sprintf("2 * %s", d.X)
+}
+
+// Shift represents a shift-left operation, equivalent to repeat doubling.
+type Shift struct {
+	X *Operand
+	S uint
+}
+
+// Inputs returns the operand to be shifted.
+func (s Shift) Inputs() []*Operand {
+	return []*Operand{s.X}
+}
+
+// Clone returns a copy of the operation.
+func (s Shift) Clone() Op {
+	return Shift{
+		X: s.X.Clone(),
+		S: s.S,
+	}
+}
+
+func (s Shift) String() string {
+	return fmt.Sprintf("%s \u226a %d", s.X, s.S)
+}
+
+// HasInput reports whether the given operation takes idx as an input.
+func HasInput(op Op, idx int) bool {
+	for _, input := range op.Inputs() {
+		if input.Index == idx {
+			return true
+		}
+	}
+	return false
+}
diff --git a/vendor/github.com/mmcloughlin/addchain/acc/parse/acc.peg b/vendor/github.com/mmcloughlin/addchain/acc/parse/acc.peg
new file mode 100644
index 00000000000..bd80d7e5789
--- /dev/null
+++ b/vendor/github.com/mmcloughlin/addchain/acc/parse/acc.peg
@@ -0,0 +1,131 @@
+{
+
+package parser
+
+func exprs(first, rest interface{}) []ast.Expr {
+    es := []ast.Expr{first.(ast.Expr)}
+    if rest == nil {
+        return es
+    }
+    for _, i := range rest.([]interface{}) {
+        es = append(es, i.([]interface{})[3].(ast.Expr))
+    }
+    return es
+}
+
+}
+
+// Chain
+
+Chain <- as:Assignment* r:Return _ EOF {
+    ch := &ast.Chain{}
+    for _, a := range as.([]interface{}) {
+        ch.Statements = append(ch.Statements, a.(ast.Statement))
+    }
+    ch.Statements = append(ch.Statements, r.(ast.Statement))
+    return ch, nil
+}
+
+// Statements
+
+Assignment <- _ n:Identifier _ '=' _ e:Expr _ EOL {
+    return ast.Statement{
+        Name: n.(ast.Identifier),
+        Expr: e.(ast.Expr),
+    }, nil
+}
+
+Return <- _ ("return" __)? e:Expr _ EOL? {
+    return ast.Statement{
+        Name: "",
+        Expr: e.(ast.Expr),
+    }, nil
+}
+
+// Expressions
+
+Expr <- e:AddExpr {
+    return e, nil
+}
+
+AddExpr <- _ x:ShiftExpr rest:(_ AddOperator _ ShiftExpr)* _ {
+    es := exprs(x, rest)
+    r := es[0]
+    for _, e := range es[1:] {
+        r = ast.Add{
+            X: r,
+            Y: e,
+        }
+    }
+    return r, nil
+}
+
+ShiftExpr <- _ x:BaseExpr _ ShiftOperator _ s:UintLiteral _ {
+    return ast.Shift{
+        X: x.(ast.Expr),
+        S: s.(uint),
+    }, nil
+} / _ DoubleOperator _ x:BaseExpr {
+    return ast.Double{
+        X: x.(ast.Expr),
+    }, nil
+} / BaseExpr
+
+BaseExpr <- ParenExpr / Operand
+
+ParenExpr <- '(' _ e:Expr _ ')' {
+    return e, nil
+}
+
+// Operators
+
+AddOperator <- '+' / "add"
+
+ShiftOperator <- "<<" / "shl"
+
+DoubleOperator <- '2' _ '*' / "dbl"
+
+// Operands
+
+Operand <- op:( One / Index / Identifier ) {
+    return op, nil
+}
+
+One <- '1' {
+    return ast.Operand(0), nil
+}
+
+Index <- '[' _ idx:UintLiteral _ ']' {
+    return ast.Operand(idx.(uint)), nil
+}
+
+// Identifiers
+
+Identifier <- [a-zA-Z_] [a-zA-Z0-9_]* {
+    return ast.Identifier(c.text), nil
+}
+
+// Primitives
+
+UintLiteral <- u64:Uint64Literal {
+    return uint(u64.(uint64)), nil
+}
+
+Uint64Literal <- (HexUintLiteral / OctalUintLiteral / DecimalUintLiteral) {
+    return strconv.ParseUint(string(c.text), 0, 64)
+}
+
+DecimalUintLiteral <- [0-9]+
+
+HexUintLiteral <- "0x" [0-9a-fA-F]+
+
+OctalUintLiteral <- '0' [0-7]+
+
+// Character classes
+
+__ <- Whitespace+
+_ <- Whitespace*
+
+Whitespace <- [ \t\r]
+EOL <- '\n'
+EOF <- !.
diff --git a/vendor/github.com/mmcloughlin/addchain/acc/parse/internal/parser/zparser.go b/vendor/github.com/mmcloughlin/addchain/acc/parse/internal/parser/zparser.go
new file mode 100644
index 00000000000..88ce3a8f618
--- /dev/null
+++ b/vendor/github.com/mmcloughlin/addchain/acc/parse/internal/parser/zparser.go
@@ -0,0 +1,2203 @@
+// Code generated by pigeon; DO NOT EDIT.
+
+package parser
+
+import (
+	"bytes"
+	"errors"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"math"
+	"os"
+	"sort"
+	"strconv"
+	"strings"
+	"sync"
+	"unicode"
+	"unicode/utf8"
+
+	"github.com/mmcloughlin/addchain/acc/ast"
+)
+
+func exprs(first, rest interface{}) []ast.Expr {
+	es := []ast.Expr{first.(ast.Expr)}
+	if rest == nil {
+		return es
+	}
+	for _, i := range rest.([]interface{}) {
+		es = append(es, i.([]interface{})[3].(ast.Expr))
+	}
+	return es
+}
+
+var g = &grammar{
+	rules: []*rule{
+		{
+			name: "Chain",
+			pos:  position{line: 20, col: 1, offset: 290},
+			expr: &actionExpr{
+				pos: position{line: 20, col: 10, offset: 299},
+				run: (*parser).callonChain1,
+				expr: &seqExpr{
+					pos: position{line: 20, col: 10, offset: 299},
+					exprs: []interface{}{
+						&labeledExpr{
+							pos:   position{line: 20, col: 10, offset: 299},
+							label: "as",
+							expr: &zeroOrMoreExpr{
+								pos: position{line: 20, col: 13, offset: 302},
+								expr: &ruleRefExpr{
+									pos:  position{line: 20, col: 13, offset: 302},
+									name: "Assignment",
+								},
+							},
+						},
+						&labeledExpr{
+							pos:   position{line: 20, col: 25, offset: 314},
+							label: "r",
+							expr: &ruleRefExpr{
+								pos:  position{line: 20, col: 27, offset: 316},
+								name: "Return",
+							},
+						},
+						&ruleRefExpr{
+							pos:  position{line: 20, col: 34, offset: 323},
+							name: "_",
+						},
+						&ruleRefExpr{
+							pos:  position{line: 20, col: 36, offset: 325},
+							name: "EOF",
+						},
+					},
+				},
+			},
+		},
+		{
+			name: "Assignment",
+			pos:  position{line: 31, col: 1, offset: 566},
+			expr: &actionExpr{
+				pos: position{line: 31, col: 15, offset: 580},
+				run: (*parser).callonAssignment1,
+				expr: &seqExpr{
+					pos: position{line: 31, col: 15, offset: 580},
+					exprs: []interface{}{
+						&ruleRefExpr{
+							pos:  position{line: 31, col: 15, offset: 580},
+							name: "_",
+						},
+						&labeledExpr{
+							pos:   position{line: 31, col: 17, offset: 582},
+							label: "n",
+							expr: &ruleRefExpr{
+								pos:  position{line: 31, col: 19, offset: 584},
+								name: "Identifier",
+							},
+						},
+						&ruleRefExpr{
+							pos:  position{line: 31, col: 30, offset: 595},
+							name: "_",
+						},
+						&litMatcher{
+							pos:        position{line: 31, col: 32, offset: 597},
+							val:        "=",
+							ignoreCase: false,
+							want:       "\"=\"",
+						},
+						&ruleRefExpr{
+							pos:  position{line: 31, col: 36, offset: 601},
+							name: "_",
+						},
+						&labeledExpr{
+							pos:   position{line: 31, col: 38, offset: 603},
+							label: "e",
+							expr: &ruleRefExpr{
+								pos:  position{line: 31, col: 40, offset: 605},
+								name: "Expr",
+							},
+						},
+						&ruleRefExpr{
+							pos:  position{line: 31, col: 45, offset: 610},
+							name: "_",
+						},
+						&ruleRefExpr{
+							pos:  position{line: 31, col: 47, offset: 612},
+							name: "EOL",
+						},
+					},
+				},
+			},
+		},
+		{
+			name: "Return",
+			pos:  position{line: 38, col: 1, offset: 720},
+			expr: &actionExpr{
+				pos: position{line: 38, col: 11, offset: 730},
+				run: (*parser).callonReturn1,
+				expr: &seqExpr{
+					pos: position{line: 38, col: 11, offset: 730},
+					exprs: []interface{}{
+						&ruleRefExpr{
+							pos:  position{line: 38, col: 11, offset: 730},
+							name: "_",
+						},
+						&zeroOrOneExpr{
+							pos: position{line: 38, col: 13, offset: 732},
+							expr: &seqExpr{
+								pos: position{line: 38, col: 14, offset: 733},
+								exprs: []interface{}{
+									&litMatcher{
+										pos:        position{line: 38, col: 14, offset: 733},
+										val:        "return",
+										ignoreCase: false,
+										want:       "\"return\"",
+									},
+									&ruleRefExpr{
+										pos:  position{line: 38, col: 23, offset: 742},
+										name: "__",
+									},
+								},
+							},
+						},
+						&labeledExpr{
+							pos:   position{line: 38, col: 28, offset: 747},
+							label: "e",
+							expr: &ruleRefExpr{
+								pos:  position{line: 38, col: 30, offset: 749},
+								name: "Expr",
+							},
+						},
+						&ruleRefExpr{
+							pos:  position{line: 38, col: 35, offset: 754},
+							name: "_",
+						},
+						&zeroOrOneExpr{
+							pos: position{line: 38, col: 37, offset: 756},
+							expr: &ruleRefExpr{
+								pos:  position{line: 38, col: 37, offset: 756},
+								name: "EOL",
+							},
+						},
+					},
+				},
+			},
+		},
+		{
+			name: "Expr",
+			pos:  position{line: 47, col: 1, offset: 865},
+			expr: &actionExpr{
+				pos: position{line: 47, col: 9, offset: 873},
+				run: (*parser).callonExpr1,
+				expr: &labeledExpr{
+					pos:   position{line: 47, col: 9, offset: 873},
+					label: "e",
+					expr: &ruleRefExpr{
+						pos:  position{line: 47, col: 11, offset: 875},
+						name: "AddExpr",
+					},
+				},
+			},
+		},
+		{
+			name: "AddExpr",
+			pos:  position{line: 51, col: 1, offset: 906},
+			expr: &actionExpr{
+				pos: position{line: 51, col: 12, offset: 917},
+				run: (*parser).callonAddExpr1,
+				expr: &seqExpr{
+					pos: position{line: 51, col: 12, offset: 917},
+					exprs: []interface{}{
+						&ruleRefExpr{
+							pos:  position{line: 51, col: 12, offset: 917},
+							name: "_",
+						},
+						&labeledExpr{
+							pos:   position{line: 51, col: 14, offset: 919},
+							label: "x",
+							expr: &ruleRefExpr{
+								pos:  position{line: 51, col: 16, offset: 921},
+								name: "ShiftExpr",
+							},
+						},
+						&labeledExpr{
+							pos:   position{line: 51, col: 26, offset: 931},
+							label: "rest",
+							expr: &zeroOrMoreExpr{
+								pos: position{line: 51, col: 31, offset: 936},
+								expr: &seqExpr{
+									pos: position{line: 51, col: 32, offset: 937},
+									exprs: []interface{}{
+										&ruleRefExpr{
+											pos:  position{line: 51, col: 32, offset: 937},
+											name: "_",
+										},
+										&ruleRefExpr{
+											pos:  position{line: 51, col: 34, offset: 939},
+											name: "AddOperator",
+										},
+										&ruleRefExpr{
+											pos:  position{line: 51, col: 46, offset: 951},
+											name: "_",
+										},
+										&ruleRefExpr{
+											pos:  position{line: 51, col: 48, offset: 953},
+											name: "ShiftExpr",
+										},
+									},
+								},
+							},
+						},
+						&ruleRefExpr{
+							pos:  position{line: 51, col: 60, offset: 965},
+							name: "_",
+						},
+					},
+				},
+			},
+		},
+		{
+			name: "ShiftExpr",
+			pos:  position{line: 63, col: 1, offset: 1134},
+			expr: &choiceExpr{
+				pos: position{line: 63, col: 14, offset: 1147},
+				alternatives: []interface{}{
+					&actionExpr{
+						pos: position{line: 63, col: 14, offset: 1147},
+						run: (*parser).callonShiftExpr2,
+						expr: &seqExpr{
+							pos: position{line: 63, col: 14, offset: 1147},
+							exprs: []interface{}{
+								&ruleRefExpr{
+									pos:  position{line: 63, col: 14, offset: 1147},
+									name: "_",
+								},
+								&labeledExpr{
+									pos:   position{line: 63, col: 16, offset: 1149},
+									label: "x",
+									expr: &ruleRefExpr{
+										pos:  position{line: 63, col: 18, offset: 1151},
+										name: "BaseExpr",
+									},
+								},
+								&ruleRefExpr{
+									pos:  position{line: 63, col: 27, offset: 1160},
+									name: "_",
+								},
+								&ruleRefExpr{
+									pos:  position{line: 63, col: 29, offset: 1162},
+									name: "ShiftOperator",
+								},
+								&ruleRefExpr{
+									pos:  position{line: 63, col: 43, offset: 1176},
+									name: "_",
+								},
+								&labeledExpr{
+									pos:   position{line: 63, col: 45, offset: 1178},
+									label: "s",
+									expr: &ruleRefExpr{
+										pos:  position{line: 63, col: 47, offset: 1180},
+										name: "UintLiteral",
+									},
+								},
+								&ruleRefExpr{
+									pos:  position{line: 63, col: 59, offset: 1192},
+									name: "_",
+								},
+							},
+						},
+					},
+					&actionExpr{
+						pos: position{line: 68, col: 5, offset: 1279},
+						run: (*parser).callonShiftExpr13,
+						expr: &seqExpr{
+							pos: position{line: 68, col: 5, offset: 1279},
+							exprs: []interface{}{
+								&ruleRefExpr{
+									pos:  position{line: 68, col: 5, offset: 1279},
+									name: "_",
+								},
+								&ruleRefExpr{
+									pos:  position{line: 68, col: 7, offset: 1281},
+									name: "DoubleOperator",
+								},
+								&ruleRefExpr{
+									pos:  position{line: 68, col: 22, offset: 1296},
+									name: "_",
+								},
+								&labeledExpr{
+									pos:   position{line: 68, col: 24, offset: 1298},
+									label: "x",
+									expr: &ruleRefExpr{
+										pos:  position{line: 68, col: 26, offset: 1300},
+										name: "BaseExpr",
+									},
+								},
+							},
+						},
+					},
+					&ruleRefExpr{
+						pos:  position{line: 72, col: 5, offset: 1374},
+						name: "BaseExpr",
+					},
+				},
+			},
+		},
+		{
+			name: "BaseExpr",
+			pos:  position{line: 74, col: 1, offset: 1384},
+			expr: &choiceExpr{
+				pos: position{line: 74, col: 13, offset: 1396},
+				alternatives: []interface{}{
+					&ruleRefExpr{
+						pos:  position{line: 74, col: 13, offset: 1396},
+						name: "ParenExpr",
+					},
+					&ruleRefExpr{
+						pos:  position{line: 74, col: 25, offset: 1408},
+						name: "Operand",
+					},
+				},
+			},
+		},
+		{
+			name: "ParenExpr",
+			pos:  position{line: 76, col: 1, offset: 1417},
+			expr: &actionExpr{
+				pos: position{line: 76, col: 14, offset: 1430},
+				run: (*parser).callonParenExpr1,
+				expr: &seqExpr{
+					pos: position{line: 76, col: 14, offset: 1430},
+					exprs: []interface{}{
+						&litMatcher{
+							pos:        position{line: 76, col: 14, offset: 1430},
+							val:        "(",
+							ignoreCase: false,
+							want:       "\"(\"",
+						},
+						&ruleRefExpr{
+							pos:  position{line: 76, col: 18, offset: 1434},
+							name: "_",
+						},
+						&labeledExpr{
+							pos:   position{line: 76, col: 20, offset: 1436},
+							label: "e",
+							expr: &ruleRefExpr{
+								pos:  position{line: 76, col: 22, offset: 1438},
+								name: "Expr",
+							},
+						},
+						&ruleRefExpr{
+							pos:  position{line: 76, col: 27, offset: 1443},
+							name: "_",
+						},
+						&litMatcher{
+							pos:        position{line: 76, col: 29, offset: 1445},
+							val:        ")",
+							ignoreCase: false,
+							want:       "\")\"",
+						},
+					},
+				},
+			},
+		},
+		{
+			name: "AddOperator",
+			pos:  position{line: 82, col: 1, offset: 1486},
+			expr: &choiceExpr{
+				pos: position{line: 82, col: 16, offset: 1501},
+				alternatives: []interface{}{
+					&litMatcher{
+						pos:        position{line: 82, col: 16, offset: 1501},
+						val:        "+",
+						ignoreCase: false,
+						want:       "\"+\"",
+					},
+					&litMatcher{
+						pos:        position{line: 82, col: 22, offset: 1507},
+						val:        "add",
+						ignoreCase: false,
+						want:       "\"add\"",
+					},
+				},
+			},
+		},
+		{
+			name: "ShiftOperator",
+			pos:  position{line: 84, col: 1, offset: 1514},
+			expr: &choiceExpr{
+				pos: position{line: 84, col: 18, offset: 1531},
+				alternatives: []interface{}{
+					&litMatcher{
+						pos:        position{line: 84, col: 18, offset: 1531},
+						val:        "<<",
+						ignoreCase: false,
+						want:       "\"<<\"",
+					},
+					&litMatcher{
+						pos:        position{line: 84, col: 25, offset: 1538},
+						val:        "shl",
+						ignoreCase: false,
+						want:       "\"shl\"",
+					},
+				},
+			},
+		},
+		{
+			name: "DoubleOperator",
+			pos:  position{line: 86, col: 1, offset: 1545},
+			expr: &choiceExpr{
+				pos: position{line: 86, col: 19, offset: 1563},
+				alternatives: []interface{}{
+					&seqExpr{
+						pos: position{line: 86, col: 19, offset: 1563},
+						exprs: []interface{}{
+							&litMatcher{
+								pos:        position{line: 86, col: 19, offset: 1563},
+								val:        "2",
+								ignoreCase: false,
+								want:       "\"2\"",
+							},
+							&ruleRefExpr{
+								pos:  position{line: 86, col: 23, offset: 1567},
+								name: "_",
+							},
+							&litMatcher{
+								pos:        position{line: 86, col: 25, offset: 1569},
+								val:        "*",
+								ignoreCase: false,
+								want:       "\"*\"",
+							},
+						},
+					},
+					&litMatcher{
+						pos:        position{line: 86, col: 31, offset: 1575},
+						val:        "dbl",
+						ignoreCase: false,
+						want:       "\"dbl\"",
+					},
+				},
+			},
+		},
+		{
+			name: "Operand",
+			pos:  position{line: 90, col: 1, offset: 1595},
+			expr: &actionExpr{
+				pos: position{line: 90, col: 12, offset: 1606},
+				run: (*parser).callonOperand1,
+				expr: &labeledExpr{
+					pos:   position{line: 90, col: 12, offset: 1606},
+					label: "op",
+					expr: &choiceExpr{
+						pos: position{line: 90, col: 17, offset: 1611},
+						alternatives: []interface{}{
+							&ruleRefExpr{
+								pos:  position{line: 90, col: 17, offset: 1611},
+								name: "One",
+							},
+							&ruleRefExpr{
+								pos:  position{line: 90, col: 23, offset: 1617},
+								name: "Index",
+							},
+							&ruleRefExpr{
+								pos:  position{line: 90, col: 31, offset: 1625},
+								name: "Identifier",
+							},
+						},
+					},
+				},
+			},
+		},
+		{
+			name: "One",
+			pos:  position{line: 94, col: 1, offset: 1662},
+			expr: &actionExpr{
+				pos: position{line: 94, col: 8, offset: 1669},
+				run: (*parser).callonOne1,
+				expr: &litMatcher{
+					pos:        position{line: 94, col: 8, offset: 1669},
+					val:        "1",
+					ignoreCase: false,
+					want:       "\"1\"",
+				},
+			},
+		},
+		{
+			name: "Index",
+			pos:  position{line: 98, col: 1, offset: 1709},
+			expr: &actionExpr{
+				pos: position{line: 98, col: 10, offset: 1718},
+				run: (*parser).callonIndex1,
+				expr: &seqExpr{
+					pos: position{line: 98, col: 10, offset: 1718},
+					exprs: []interface{}{
+						&litMatcher{
+							pos:        position{line: 98, col: 10, offset: 1718},
+							val:        "[",
+							ignoreCase: false,
+							want:       "\"[\"",
+						},
+						&ruleRefExpr{
+							pos:  position{line: 98, col: 14, offset: 1722},
+							name: "_",
+						},
+						&labeledExpr{
+							pos:   position{line: 98, col: 16, offset: 1724},
+							label: "idx",
+							expr: &ruleRefExpr{
+								pos:  position{line: 98, col: 20, offset: 1728},
+								name: "UintLiteral",
+							},
+						},
+						&ruleRefExpr{
+							pos:  position{line: 98, col: 32, offset: 1740},
+							name: "_",
+						},
+						&litMatcher{
+							pos:        position{line: 98, col: 34, offset: 1742},
+							val:        "]",
+							ignoreCase: false,
+							want:       "\"]\"",
+						},
+					},
+				},
+			},
+		},
+		{
+			name: "Identifier",
+			pos:  position{line: 104, col: 1, offset: 1807},
+			expr: &actionExpr{
+				pos: position{line: 104, col: 15, offset: 1821},
+				run: (*parser).callonIdentifier1,
+				expr: &seqExpr{
+					pos: position{line: 104, col: 15, offset: 1821},
+					exprs: []interface{}{
+						&charClassMatcher{
+							pos:        position{line: 104, col: 15, offset: 1821},
+							val:        "[a-zA-Z_]",
+							chars:      []rune{'_'},
+							ranges:     []rune{'a', 'z', 'A', 'Z'},
+							ignoreCase: false,
+							inverted:   false,
+						},
+						&zeroOrMoreExpr{
+							pos: position{line: 104, col: 25, offset: 1831},
+							expr: &charClassMatcher{
+								pos:        position{line: 104, col: 25, offset: 1831},
+								val:        "[a-zA-Z0-9_]",
+								chars:      []rune{'_'},
+								ranges:     []rune{'a', 'z', 'A', 'Z', '0', '9'},
+								ignoreCase: false,
+								inverted:   false,
+							},
+						},
+					},
+				},
+			},
+		},
+		{
+			name: "UintLiteral",
+			pos:  position{line: 110, col: 1, offset: 1904},
+			expr: &actionExpr{
+				pos: position{line: 110, col: 16, offset: 1919},
+				run: (*parser).callonUintLiteral1,
+				expr: &labeledExpr{
+					pos:   position{line: 110, col: 16, offset: 1919},
+					label: "u64",
+					expr: &ruleRefExpr{
+						pos:  position{line: 110, col: 20, offset: 1923},
+						name: "Uint64Literal",
+					},
+				},
+			},
+		},
+		{
+			name: "Uint64Literal",
+			pos:  position{line: 114, col: 1, offset: 1977},
+			expr: &actionExpr{
+				pos: position{line: 114, col: 18, offset: 1994},
+				run: (*parser).callonUint64Literal1,
+				expr: &choiceExpr{
+					pos: position{line: 114, col: 19, offset: 1995},
+					alternatives: []interface{}{
+						&ruleRefExpr{
+							pos:  position{line: 114, col: 19, offset: 1995},
+							name: "HexUintLiteral",
+						},
+						&ruleRefExpr{
+							pos:  position{line: 114, col: 36, offset: 2012},
+							name: "OctalUintLiteral",
+						},
+						&ruleRefExpr{
+							pos:  position{line: 114, col: 55, offset: 2031},
+							name: "DecimalUintLiteral",
+						},
+					},
+				},
+			},
+		},
+		{
+			name: "DecimalUintLiteral",
+			pos:  position{line: 118, col: 1, offset: 2108},
+			expr: &oneOrMoreExpr{
+				pos: position{line: 118, col: 23, offset: 2130},
+				expr: &charClassMatcher{
+					pos:        position{line: 118, col: 23, offset: 2130},
+					val:        "[0-9]",
+					ranges:     []rune{'0', '9'},
+					ignoreCase: false,
+					inverted:   false,
+				},
+			},
+		},
+		{
+			name: "HexUintLiteral",
+			pos:  position{line: 120, col: 1, offset: 2138},
+			expr: &seqExpr{
+				pos: position{line: 120, col: 19, offset: 2156},
+				exprs: []interface{}{
+					&litMatcher{
+						pos:        position{line: 120, col: 19, offset: 2156},
+						val:        "0x",
+						ignoreCase: false,
+						want:       "\"0x\"",
+					},
+					&oneOrMoreExpr{
+						pos: position{line: 120, col: 24, offset: 2161},
+						expr: &charClassMatcher{
+							pos:        position{line: 120, col: 24, offset: 2161},
+							val:        "[0-9a-fA-F]",
+							ranges:     []rune{'0', '9', 'a', 'f', 'A', 'F'},
+							ignoreCase: false,
+							inverted:   false,
+						},
+					},
+				},
+			},
+		},
+		{
+			name: "OctalUintLiteral",
+			pos:  position{line: 122, col: 1, offset: 2175},
+			expr: &seqExpr{
+				pos: position{line: 122, col: 21, offset: 2195},
+				exprs: []interface{}{
+					&litMatcher{
+						pos:        position{line: 122, col: 21, offset: 2195},
+						val:        "0",
+						ignoreCase: false,
+						want:       "\"0\"",
+					},
+					&oneOrMoreExpr{
+						pos: position{line: 122, col: 25, offset: 2199},
+						expr: &charClassMatcher{
+							pos:        position{line: 122, col: 25, offset: 2199},
+							val:        "[0-7]",
+							ranges:     []rune{'0', '7'},
+							ignoreCase: false,
+							inverted:   false,
+						},
+					},
+				},
+			},
+		},
+		{
+			name: "__",
+			pos:  position{line: 126, col: 1, offset: 2229},
+			expr: &oneOrMoreExpr{
+				pos: position{line: 126, col: 7, offset: 2235},
+				expr: &ruleRefExpr{
+					pos:  position{line: 126, col: 7, offset: 2235},
+					name: "Whitespace",
+				},
+			},
+		},
+		{
+			name: "_",
+			pos:  position{line: 127, col: 1, offset: 2247},
+			expr: &zeroOrMoreExpr{
+				pos: position{line: 127, col: 6, offset: 2252},
+				expr: &ruleRefExpr{
+					pos:  position{line: 127, col: 6, offset: 2252},
+					name: "Whitespace",
+				},
+			},
+		},
+		{
+			name: "Whitespace",
+			pos:  position{line: 129, col: 1, offset: 2265},
+			expr: &charClassMatcher{
+				pos:        position{line: 129, col: 15, offset: 2279},
+				val:        "[ \\t\\r]",
+				chars:      []rune{' ', '\t', '\r'},
+				ignoreCase: false,
+				inverted:   false,
+			},
+		},
+		{
+			name: "EOL",
+			pos:  position{line: 130, col: 1, offset: 2287},
+			expr: &litMatcher{
+				pos:        position{line: 130, col: 8, offset: 2294},
+				val:        "\n",
+				ignoreCase: false,
+				want:       "\"\\n\"",
+			},
+		},
+		{
+			name: "EOF",
+			pos:  position{line: 131, col: 1, offset: 2299},
+			expr: &notExpr{
+				pos: position{line: 131, col: 8, offset: 2306},
+				expr: &anyMatcher{
+					line: 131, col: 9, offset: 2307,
+				},
+			},
+		},
+	},
+}
+
+func (c *current) onChain1(as, r interface{}) (interface{}, error) {
+	ch := &ast.Chain{}
+	for _, a := range as.([]interface{}) {
+		ch.Statements = append(ch.Statements, a.(ast.Statement))
+	}
+	ch.Statements = append(ch.Statements, r.(ast.Statement))
+	return ch, nil
+}
+
+func (p *parser) callonChain1() (interface{}, error) {
+	stack := p.vstack[len(p.vstack)-1]
+	_ = stack
+	return p.cur.onChain1(stack["as"], stack["r"])
+}
+
+func (c *current) onAssignment1(n, e interface{}) (interface{}, error) {
+	return ast.Statement{
+		Name: n.(ast.Identifier),
+		Expr: e.(ast.Expr),
+	}, nil
+}
+
+func (p *parser) callonAssignment1() (interface{}, error) {
+	stack := p.vstack[len(p.vstack)-1]
+	_ = stack
+	return p.cur.onAssignment1(stack["n"], stack["e"])
+}
+
+func (c *current) onReturn1(e interface{}) (interface{}, error) {
+	return ast.Statement{
+		Name: "",
+		Expr: e.(ast.Expr),
+	}, nil
+}
+
+func (p *parser) callonReturn1() (interface{}, error) {
+	stack := p.vstack[len(p.vstack)-1]
+	_ = stack
+	return p.cur.onReturn1(stack["e"])
+}
+
+func (c *current) onExpr1(e interface{}) (interface{}, error) {
+	return e, nil
+}
+
+func (p *parser) callonExpr1() (interface{}, error) {
+	stack := p.vstack[len(p.vstack)-1]
+	_ = stack
+	return p.cur.onExpr1(stack["e"])
+}
+
+func (c *current) onAddExpr1(x, rest interface{}) (interface{}, error) {
+	es := exprs(x, rest)
+	r := es[0]
+	for _, e := range es[1:] {
+		r = ast.Add{
+			X: r,
+			Y: e,
+		}
+	}
+	return r, nil
+}
+
+func (p *parser) callonAddExpr1() (interface{}, error) {
+	stack := p.vstack[len(p.vstack)-1]
+	_ = stack
+	return p.cur.onAddExpr1(stack["x"], stack["rest"])
+}
+
+func (c *current) onShiftExpr2(x, s interface{}) (interface{}, error) {
+	return ast.Shift{
+		X: x.(ast.Expr),
+		S: s.(uint),
+	}, nil
+}
+
+func (p *parser) callonShiftExpr2() (interface{}, error) {
+	stack := p.vstack[len(p.vstack)-1]
+	_ = stack
+	return p.cur.onShiftExpr2(stack["x"], stack["s"])
+}
+
+func (c *current) onShiftExpr13(x interface{}) (interface{}, error) {
+	return ast.Double{
+		X: x.(ast.Expr),
+	}, nil
+}
+
+func (p *parser) callonShiftExpr13() (interface{}, error) {
+	stack := p.vstack[len(p.vstack)-1]
+	_ = stack
+	return p.cur.onShiftExpr13(stack["x"])
+}
+
+func (c *current) onParenExpr1(e interface{}) (interface{}, error) {
+	return e, nil
+}
+
+func (p *parser) callonParenExpr1() (interface{}, error) {
+	stack := p.vstack[len(p.vstack)-1]
+	_ = stack
+	return p.cur.onParenExpr1(stack["e"])
+}
+
+func (c *current) onOperand1(op interface{}) (interface{}, error) {
+	return op, nil
+}
+
+func (p *parser) callonOperand1() (interface{}, error) {
+	stack := p.vstack[len(p.vstack)-1]
+	_ = stack
+	return p.cur.onOperand1(stack["op"])
+}
+
+func (c *current) onOne1() (interface{}, error) {
+	return ast.Operand(0), nil
+}
+
+func (p *parser) callonOne1() (interface{}, error) {
+	stack := p.vstack[len(p.vstack)-1]
+	_ = stack
+	return p.cur.onOne1()
+}
+
+func (c *current) onIndex1(idx interface{}) (interface{}, error) {
+	return ast.Operand(idx.(uint)), nil
+}
+
+func (p *parser) callonIndex1() (interface{}, error) {
+	stack := p.vstack[len(p.vstack)-1]
+	_ = stack
+	return p.cur.onIndex1(stack["idx"])
+}
+
+func (c *current) onIdentifier1() (interface{}, error) {
+	return ast.Identifier(c.text), nil
+}
+
+func (p *parser) callonIdentifier1() (interface{}, error) {
+	stack := p.vstack[len(p.vstack)-1]
+	_ = stack
+	return p.cur.onIdentifier1()
+}
+
+func (c *current) onUintLiteral1(u64 interface{}) (interface{}, error) {
+	return uint(u64.(uint64)), nil
+}
+
+func (p *parser) callonUintLiteral1() (interface{}, error) {
+	stack := p.vstack[len(p.vstack)-1]
+	_ = stack
+	return p.cur.onUintLiteral1(stack["u64"])
+}
+
+func (c *current) onUint64Literal1() (interface{}, error) {
+	return strconv.ParseUint(string(c.text), 0, 64)
+}
+
+func (p *parser) callonUint64Literal1() (interface{}, error) {
+	stack := p.vstack[len(p.vstack)-1]
+	_ = stack
+	return p.cur.onUint64Literal1()
+}
+
+var (
+	// errNoRule is returned when the grammar to parse has no rule.
+	errNoRule = errors.New("grammar has no rule")
+
+	// errInvalidEntrypoint is returned when the specified entrypoint rule
+	// does not exit.
+	errInvalidEntrypoint = errors.New("invalid entrypoint")
+
+	// errInvalidEncoding is returned when the source is not properly
+	// utf8-encoded.
+	errInvalidEncoding = errors.New("invalid encoding")
+
+	// errMaxExprCnt is used to signal that the maximum number of
+	// expressions have been parsed.
+	errMaxExprCnt = errors.New("max number of expresssions parsed")
+)
+
+// Option is a function that can set an option on the parser. It returns
+// the previous setting as an Option.
+type Option func(*parser) Option
+
+// MaxExpressions creates an Option to stop parsing after the provided
+// number of expressions have been parsed, if the value is 0 then the parser will
+// parse for as many steps as needed (possibly an infinite number).
+//
+// The default for maxExprCnt is 0.
+func MaxExpressions(maxExprCnt uint64) Option {
+	return func(p *parser) Option {
+		oldMaxExprCnt := p.maxExprCnt
+		p.maxExprCnt = maxExprCnt
+		return MaxExpressions(oldMaxExprCnt)
+	}
+}
+
+// Entrypoint creates an Option to set the rule name to use as entrypoint.
+// The rule name must have been specified in the -alternate-entrypoints
+// if generating the parser with the -optimize-grammar flag, otherwise
+// it may have been optimized out. Passing an empty string sets the
+// entrypoint to the first rule in the grammar.
+//
+// The default is to start parsing at the first rule in the grammar.
+func Entrypoint(ruleName string) Option {
+	return func(p *parser) Option {
+		oldEntrypoint := p.entrypoint
+		p.entrypoint = ruleName
+		if ruleName == "" {
+			p.entrypoint = g.rules[0].name
+		}
+		return Entrypoint(oldEntrypoint)
+	}
+}
+
+// Statistics adds a user provided Stats struct to the parser to allow
+// the user to process the results after the parsing has finished.
+// Also the key for the "no match" counter is set.
+//
+// Example usage:
+//
+//     input := "input"
+//     stats := Stats{}
+//     _, err := Parse("input-file", []byte(input), Statistics(&stats, "no match"))
+//     if err != nil {
+//         log.Panicln(err)
+//     }
+//     b, err := json.MarshalIndent(stats.ChoiceAltCnt, "", "  ")
+//     if err != nil {
+//         log.Panicln(err)
+//     }
+//     fmt.Println(string(b))
+//
+func Statistics(stats *Stats, choiceNoMatch string) Option {
+	return func(p *parser) Option {
+		oldStats := p.Stats
+		p.Stats = stats
+		oldChoiceNoMatch := p.choiceNoMatch
+		p.choiceNoMatch = choiceNoMatch
+		if p.Stats.ChoiceAltCnt == nil {
+			p.Stats.ChoiceAltCnt = make(map[string]map[string]int)
+		}
+		return Statistics(oldStats, oldChoiceNoMatch)
+	}
+}
+
+// Debug creates an Option to set the debug flag to b. When set to true,
+// debugging information is printed to stdout while parsing.
+//
+// The default is false.
+func Debug(b bool) Option {
+	return func(p *parser) Option {
+		old := p.debug
+		p.debug = b
+		return Debug(old)
+	}
+}
+
+// Memoize creates an Option to set the memoize flag to b. When set to true,
+// the parser will cache all results so each expression is evaluated only
+// once. This guarantees linear parsing time even for pathological cases,
+// at the expense of more memory and slower times for typical cases.
+//
+// The default is false.
+func Memoize(b bool) Option {
+	return func(p *parser) Option {
+		old := p.memoize
+		p.memoize = b
+		return Memoize(old)
+	}
+}
+
+// AllowInvalidUTF8 creates an Option to allow invalid UTF-8 bytes.
+// Every invalid UTF-8 byte is treated as a utf8.RuneError (U+FFFD)
+// by character class matchers and is matched by the any matcher.
+// The returned matched value, c.text and c.offset are NOT affected.
+//
+// The default is false.
+func AllowInvalidUTF8(b bool) Option {
+	return func(p *parser) Option {
+		old := p.allowInvalidUTF8
+		p.allowInvalidUTF8 = b
+		return AllowInvalidUTF8(old)
+	}
+}
+
+// Recover creates an Option to set the recover flag to b. When set to
+// true, this causes the parser to recover from panics and convert it
+// to an error. Setting it to false can be useful while debugging to
+// access the full stack trace.
+//
+// The default is true.
+func Recover(b bool) Option {
+	return func(p *parser) Option {
+		old := p.recover
+		p.recover = b
+		return Recover(old)
+	}
+}
+
+// GlobalStore creates an Option to set a key to a certain value in
+// the globalStore.
+func GlobalStore(key string, value interface{}) Option {
+	return func(p *parser) Option {
+		old := p.cur.globalStore[key]
+		p.cur.globalStore[key] = value
+		return GlobalStore(key, old)
+	}
+}
+
+// InitState creates an Option to set a key to a certain value in
+// the global "state" store.
+func InitState(key string, value interface{}) Option {
+	return func(p *parser) Option {
+		old := p.cur.state[key]
+		p.cur.state[key] = value
+		return InitState(key, old)
+	}
+}
+
+// ParseFile parses the file identified by filename.
+func ParseFile(filename string, opts ...Option) (i interface{}, err error) {
+	f, err := os.Open(filename)
+	if err != nil {
+		return nil, err
+	}
+	defer func() {
+		if closeErr := f.Close(); closeErr != nil {
+			err = closeErr
+		}
+	}()
+	return ParseReader(filename, f, opts...)
+}
+
+// ParseReader parses the data from r using filename as information in the
+// error messages.
+func ParseReader(filename string, r io.Reader, opts ...Option) (interface{}, error) {
+	b, err := ioutil.ReadAll(r)
+	if err != nil {
+		return nil, err
+	}
+
+	return Parse(filename, b, opts...)
+}
+
+// Parse parses the data from b using filename as information in the
+// error messages.
+func Parse(filename string, b []byte, opts ...Option) (interface{}, error) {
+	return newParser(filename, b, opts...).parse(g)
+}
+
+// position records a position in the text.
+type position struct {
+	line, col, offset int
+}
+
+func (p position) String() string {
+	return strconv.Itoa(p.line) + ":" + strconv.Itoa(p.col) + " [" + strconv.Itoa(p.offset) + "]"
+}
+
+// savepoint stores all state required to go back to this point in the
+// parser.
+type savepoint struct {
+	position
+	rn rune
+	w  int
+}
+
+type current struct {
+	pos  position // start position of the match
+	text []byte   // raw text of the match
+
+	// state is a store for arbitrary key,value pairs that the user wants to be
+	// tied to the backtracking of the parser.
+	// This is always rolled back if a parsing rule fails.
+	state storeDict
+
+	// globalStore is a general store for the user to store arbitrary key-value
+	// pairs that they need to manage and that they do not want tied to the
+	// backtracking of the parser. This is only modified by the user and never
+	// rolled back by the parser. It is always up to the user to keep this in a
+	// consistent state.
+	globalStore storeDict
+}
+
+type storeDict map[string]interface{}
+
+// the AST types...
+
+type grammar struct {
+	pos   position
+	rules []*rule
+}
+
+type rule struct {
+	pos         position
+	name        string
+	displayName string
+	expr        interface{}
+}
+
+type choiceExpr struct {
+	pos          position
+	alternatives []interface{}
+}
+
+type actionExpr struct {
+	pos  position
+	expr interface{}
+	run  func(*parser) (interface{}, error)
+}
+
+type recoveryExpr struct {
+	pos          position
+	expr         interface{}
+	recoverExpr  interface{}
+	failureLabel []string
+}
+
+type seqExpr struct {
+	pos   position
+	exprs []interface{}
+}
+
+type throwExpr struct {
+	pos   position
+	label string
+}
+
+type labeledExpr struct {
+	pos   position
+	label string
+	expr  interface{}
+}
+
+type expr struct {
+	pos  position
+	expr interface{}
+}
+
+type andExpr expr
+type notExpr expr
+type zeroOrOneExpr expr
+type zeroOrMoreExpr expr
+type oneOrMoreExpr expr
+
+type ruleRefExpr struct {
+	pos  position
+	name string
+}
+
+type stateCodeExpr struct {
+	pos position
+	run func(*parser) error
+}
+
+type andCodeExpr struct {
+	pos position
+	run func(*parser) (bool, error)
+}
+
+type notCodeExpr struct {
+	pos position
+	run func(*parser) (bool, error)
+}
+
+type litMatcher struct {
+	pos        position
+	val        string
+	ignoreCase bool
+	want       string
+}
+
+type charClassMatcher struct {
+	pos             position
+	val             string
+	basicLatinChars [128]bool
+	chars           []rune
+	ranges          []rune
+	classes         []*unicode.RangeTable
+	ignoreCase      bool
+	inverted        bool
+}
+
+type anyMatcher position
+
+// errList cumulates the errors found by the parser.
+type errList []error
+
+func (e *errList) add(err error) {
+	*e = append(*e, err)
+}
+
+func (e errList) err() error {
+	if len(e) == 0 {
+		return nil
+	}
+	e.dedupe()
+	return e
+}
+
+func (e *errList) dedupe() {
+	var cleaned []error
+	set := make(map[string]bool)
+	for _, err := range *e {
+		if msg := err.Error(); !set[msg] {
+			set[msg] = true
+			cleaned = append(cleaned, err)
+		}
+	}
+	*e = cleaned
+}
+
+func (e errList) Error() string {
+	switch len(e) {
+	case 0:
+		return ""
+	case 1:
+		return e[0].Error()
+	default:
+		var buf bytes.Buffer
+
+		for i, err := range e {
+			if i > 0 {
+				buf.WriteRune('\n')
+			}
+			buf.WriteString(err.Error())
+		}
+		return buf.String()
+	}
+}
+
+// parserError wraps an error with a prefix indicating the rule in which
+// the error occurred. The original error is stored in the Inner field.
+type parserError struct {
+	Inner    error
+	pos      position
+	prefix   string
+	expected []string
+}
+
+// Error returns the error message.
+func (p *parserError) Error() string {
+	return p.prefix + ": " + p.Inner.Error()
+}
+
+// newParser creates a parser with the specified input source and options.
+func newParser(filename string, b []byte, opts ...Option) *parser {
+	stats := Stats{
+		ChoiceAltCnt: make(map[string]map[string]int),
+	}
+
+	p := &parser{
+		filename: filename,
+		errs:     new(errList),
+		data:     b,
+		pt:       savepoint{position: position{line: 1}},
+		recover:  true,
+		cur: current{
+			state:       make(storeDict),
+			globalStore: make(storeDict),
+		},
+		maxFailPos:      position{col: 1, line: 1},
+		maxFailExpected: make([]string, 0, 20),
+		Stats:           &stats,
+		// start rule is rule [0] unless an alternate entrypoint is specified
+		entrypoint: g.rules[0].name,
+	}
+	p.setOptions(opts)
+
+	if p.maxExprCnt == 0 {
+		p.maxExprCnt = math.MaxUint64
+	}
+
+	return p
+}
+
+// setOptions applies the options to the parser.
+func (p *parser) setOptions(opts []Option) {
+	for _, opt := range opts {
+		opt(p)
+	}
+}
+
+type resultTuple struct {
+	v   interface{}
+	b   bool
+	end savepoint
+}
+
+const choiceNoMatch = -1
+
+// Stats stores some statistics, gathered during parsing
+type Stats struct {
+	// ExprCnt counts the number of expressions processed during parsing
+	// This value is compared to the maximum number of expressions allowed
+	// (set by the MaxExpressions option).
+	ExprCnt uint64
+
+	// ChoiceAltCnt is used to count for each ordered choice expression,
+	// which alternative is used how may times.
+	// These numbers allow to optimize the order of the ordered choice expression
+	// to increase the performance of the parser
+	//
+	// The outer key of ChoiceAltCnt is composed of the name of the rule as well
+	// as the line and the column of the ordered choice.
+	// The inner key of ChoiceAltCnt is the number (one-based) of the matching alternative.
+	// For each alternative the number of matches are counted. If an ordered choice does not
+	// match, a special counter is incremented. The name of this counter is set with
+	// the parser option Statistics.
+	// For an alternative to be included in ChoiceAltCnt, it has to match at least once.
+	ChoiceAltCnt map[string]map[string]int
+}
+
+type parser struct {
+	filename string
+	pt       savepoint
+	cur      current
+
+	data []byte
+	errs *errList
+
+	depth   int
+	recover bool
+	debug   bool
+
+	memoize bool
+	// memoization table for the packrat algorithm:
+	// map[offset in source] map[expression or rule] {value, match}
+	memo map[int]map[interface{}]resultTuple
+
+	// rules table, maps the rule identifier to the rule node
+	rules map[string]*rule
+	// variables stack, map of label to value
+	vstack []map[string]interface{}
+	// rule stack, allows identification of the current rule in errors
+	rstack []*rule
+
+	// parse fail
+	maxFailPos            position
+	maxFailExpected       []string
+	maxFailInvertExpected bool
+
+	// max number of expressions to be parsed
+	maxExprCnt uint64
+	// entrypoint for the parser
+	entrypoint string
+
+	allowInvalidUTF8 bool
+
+	*Stats
+
+	choiceNoMatch string
+	// recovery expression stack, keeps track of the currently available recovery expression, these are traversed in reverse
+	recoveryStack []map[string]interface{}
+}
+
+// push a variable set on the vstack.
+func (p *parser) pushV() {
+	if cap(p.vstack) == len(p.vstack) {
+		// create new empty slot in the stack
+		p.vstack = append(p.vstack, nil)
+	} else {
+		// slice to 1 more
+		p.vstack = p.vstack[:len(p.vstack)+1]
+	}
+
+	// get the last args set
+	m := p.vstack[len(p.vstack)-1]
+	if m != nil && len(m) == 0 {
+		// empty map, all good
+		return
+	}
+
+	m = make(map[string]interface{})
+	p.vstack[len(p.vstack)-1] = m
+}
+
+// pop a variable set from the vstack.
+func (p *parser) popV() {
+	// if the map is not empty, clear it
+	m := p.vstack[len(p.vstack)-1]
+	if len(m) > 0 {
+		// GC that map
+		p.vstack[len(p.vstack)-1] = nil
+	}
+	p.vstack = p.vstack[:len(p.vstack)-1]
+}
+
+// push a recovery expression with its labels to the recoveryStack
+func (p *parser) pushRecovery(labels []string, expr interface{}) {
+	if cap(p.recoveryStack) == len(p.recoveryStack) {
+		// create new empty slot in the stack
+		p.recoveryStack = append(p.recoveryStack, nil)
+	} else {
+		// slice to 1 more
+		p.recoveryStack = p.recoveryStack[:len(p.recoveryStack)+1]
+	}
+
+	m := make(map[string]interface{}, len(labels))
+	for _, fl := range labels {
+		m[fl] = expr
+	}
+	p.recoveryStack[len(p.recoveryStack)-1] = m
+}
+
+// pop a recovery expression from the recoveryStack
+func (p *parser) popRecovery() {
+	// GC that map
+	p.recoveryStack[len(p.recoveryStack)-1] = nil
+
+	p.recoveryStack = p.recoveryStack[:len(p.recoveryStack)-1]
+}
+
+func (p *parser) print(prefix, s string) string {
+	if !p.debug {
+		return s
+	}
+
+	fmt.Printf("%s %d:%d:%d: %s [%#U]\n",
+		prefix, p.pt.line, p.pt.col, p.pt.offset, s, p.pt.rn)
+	return s
+}
+
+func (p *parser) in(s string) string {
+	p.depth++
+	return p.print(strings.Repeat(" ", p.depth)+">", s)
+}
+
+func (p *parser) out(s string) string {
+	p.depth--
+	return p.print(strings.Repeat(" ", p.depth)+"<", s)
+}
+
+func (p *parser) addErr(err error) {
+	p.addErrAt(err, p.pt.position, []string{})
+}
+
+func (p *parser) addErrAt(err error, pos position, expected []string) {
+	var buf bytes.Buffer
+	if p.filename != "" {
+		buf.WriteString(p.filename)
+	}
+	if buf.Len() > 0 {
+		buf.WriteString(":")
+	}
+	buf.WriteString(fmt.Sprintf("%d:%d (%d)", pos.line, pos.col, pos.offset))
+	if len(p.rstack) > 0 {
+		if buf.Len() > 0 {
+			buf.WriteString(": ")
+		}
+		rule := p.rstack[len(p.rstack)-1]
+		if rule.displayName != "" {
+			buf.WriteString("rule " + rule.displayName)
+		} else {
+			buf.WriteString("rule " + rule.name)
+		}
+	}
+	pe := &parserError{Inner: err, pos: pos, prefix: buf.String(), expected: expected}
+	p.errs.add(pe)
+}
+
+func (p *parser) failAt(fail bool, pos position, want string) {
+	// process fail if parsing fails and not inverted or parsing succeeds and invert is set
+	if fail == p.maxFailInvertExpected {
+		if pos.offset < p.maxFailPos.offset {
+			return
+		}
+
+		if pos.offset > p.maxFailPos.offset {
+			p.maxFailPos = pos
+			p.maxFailExpected = p.maxFailExpected[:0]
+		}
+
+		if p.maxFailInvertExpected {
+			want = "!" + want
+		}
+		p.maxFailExpected = append(p.maxFailExpected, want)
+	}
+}
+
+// read advances the parser to the next rune.
+func (p *parser) read() {
+	p.pt.offset += p.pt.w
+	rn, n := utf8.DecodeRune(p.data[p.pt.offset:])
+	p.pt.rn = rn
+	p.pt.w = n
+	p.pt.col++
+	if rn == '\n' {
+		p.pt.line++
+		p.pt.col = 0
+	}
+
+	if rn == utf8.RuneError && n == 1 { // see utf8.DecodeRune
+		if !p.allowInvalidUTF8 {
+			p.addErr(errInvalidEncoding)
+		}
+	}
+}
+
+// restore parser position to the savepoint pt.
+func (p *parser) restore(pt savepoint) {
+	if p.debug {
+		defer p.out(p.in("restore"))
+	}
+	if pt.offset == p.pt.offset {
+		return
+	}
+	p.pt = pt
+}
+
+// Cloner is implemented by any value that has a Clone method, which returns a
+// copy of the value. This is mainly used for types which are not passed by
+// value (e.g map, slice, chan) or structs that contain such types.
+//
+// This is used in conjunction with the global state feature to create proper
+// copies of the state to allow the parser to properly restore the state in
+// the case of backtracking.
+type Cloner interface {
+	Clone() interface{}
+}
+
+var statePool = &sync.Pool{
+	New: func() interface{} { return make(storeDict) },
+}
+
+func (sd storeDict) Discard() {
+	for k := range sd {
+		delete(sd, k)
+	}
+	statePool.Put(sd)
+}
+
+// clone and return parser current state.
+func (p *parser) cloneState() storeDict {
+	if p.debug {
+		defer p.out(p.in("cloneState"))
+	}
+
+	state := statePool.Get().(storeDict)
+	for k, v := range p.cur.state {
+		if c, ok := v.(Cloner); ok {
+			state[k] = c.Clone()
+		} else {
+			state[k] = v
+		}
+	}
+	return state
+}
+
+// restore parser current state to the state storeDict.
+// every restoreState should applied only one time for every cloned state
+func (p *parser) restoreState(state storeDict) {
+	if p.debug {
+		defer p.out(p.in("restoreState"))
+	}
+	p.cur.state.Discard()
+	p.cur.state = state
+}
+
+// get the slice of bytes from the savepoint start to the current position.
+func (p *parser) sliceFrom(start savepoint) []byte {
+	return p.data[start.position.offset:p.pt.position.offset]
+}
+
+func (p *parser) getMemoized(node interface{}) (resultTuple, bool) {
+	if len(p.memo) == 0 {
+		return resultTuple{}, false
+	}
+	m := p.memo[p.pt.offset]
+	if len(m) == 0 {
+		return resultTuple{}, false
+	}
+	res, ok := m[node]
+	return res, ok
+}
+
+func (p *parser) setMemoized(pt savepoint, node interface{}, tuple resultTuple) {
+	if p.memo == nil {
+		p.memo = make(map[int]map[interface{}]resultTuple)
+	}
+	m := p.memo[pt.offset]
+	if m == nil {
+		m = make(map[interface{}]resultTuple)
+		p.memo[pt.offset] = m
+	}
+	m[node] = tuple
+}
+
+func (p *parser) buildRulesTable(g *grammar) {
+	p.rules = make(map[string]*rule, len(g.rules))
+	for _, r := range g.rules {
+		p.rules[r.name] = r
+	}
+}
+
+func (p *parser) parse(g *grammar) (val interface{}, err error) {
+	if len(g.rules) == 0 {
+		p.addErr(errNoRule)
+		return nil, p.errs.err()
+	}
+
+	// TODO : not super critical but this could be generated
+	p.buildRulesTable(g)
+
+	if p.recover {
+		// panic can be used in action code to stop parsing immediately
+		// and return the panic as an error.
+		defer func() {
+			if e := recover(); e != nil {
+				if p.debug {
+					defer p.out(p.in("panic handler"))
+				}
+				val = nil
+				switch e := e.(type) {
+				case error:
+					p.addErr(e)
+				default:
+					p.addErr(fmt.Errorf("%v", e))
+				}
+				err = p.errs.err()
+			}
+		}()
+	}
+
+	startRule, ok := p.rules[p.entrypoint]
+	if !ok {
+		p.addErr(errInvalidEntrypoint)
+		return nil, p.errs.err()
+	}
+
+	p.read() // advance to first rune
+	val, ok = p.parseRule(startRule)
+	if !ok {
+		if len(*p.errs) == 0 {
+			// If parsing fails, but no errors have been recorded, the expected values
+			// for the farthest parser position are returned as error.
+			maxFailExpectedMap := make(map[string]struct{}, len(p.maxFailExpected))
+			for _, v := range p.maxFailExpected {
+				maxFailExpectedMap[v] = struct{}{}
+			}
+			expected := make([]string, 0, len(maxFailExpectedMap))
+			eof := false
+			if _, ok := maxFailExpectedMap["!."]; ok {
+				delete(maxFailExpectedMap, "!.")
+				eof = true
+			}
+			for k := range maxFailExpectedMap {
+				expected = append(expected, k)
+			}
+			sort.Strings(expected)
+			if eof {
+				expected = append(expected, "EOF")
+			}
+			p.addErrAt(errors.New("no match found, expected: "+listJoin(expected, ", ", "or")), p.maxFailPos, expected)
+		}
+
+		return nil, p.errs.err()
+	}
+	return val, p.errs.err()
+}
+
+func listJoin(list []string, sep string, lastSep string) string {
+	switch len(list) {
+	case 0:
+		return ""
+	case 1:
+		return list[0]
+	default:
+		return strings.Join(list[:len(list)-1], sep) + " " + lastSep + " " + list[len(list)-1]
+	}
+}
+
+func (p *parser) parseRule(rule *rule) (interface{}, bool) {
+	if p.debug {
+		defer p.out(p.in("parseRule " + rule.name))
+	}
+
+	if p.memoize {
+		res, ok := p.getMemoized(rule)
+		if ok {
+			p.restore(res.end)
+			return res.v, res.b
+		}
+	}
+
+	start := p.pt
+	p.rstack = append(p.rstack, rule)
+	p.pushV()
+	val, ok := p.parseExpr(rule.expr)
+	p.popV()
+	p.rstack = p.rstack[:len(p.rstack)-1]
+	if ok && p.debug {
+		p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start)))
+	}
+
+	if p.memoize {
+		p.setMemoized(start, rule, resultTuple{val, ok, p.pt})
+	}
+	return val, ok
+}
+
+func (p *parser) parseExpr(expr interface{}) (interface{}, bool) {
+	var pt savepoint
+
+	if p.memoize {
+		res, ok := p.getMemoized(expr)
+		if ok {
+			p.restore(res.end)
+			return res.v, res.b
+		}
+		pt = p.pt
+	}
+
+	p.ExprCnt++
+	if p.ExprCnt > p.maxExprCnt {
+		panic(errMaxExprCnt)
+	}
+
+	var val interface{}
+	var ok bool
+	switch expr := expr.(type) {
+	case *actionExpr:
+		val, ok = p.parseActionExpr(expr)
+	case *andCodeExpr:
+		val, ok = p.parseAndCodeExpr(expr)
+	case *andExpr:
+		val, ok = p.parseAndExpr(expr)
+	case *anyMatcher:
+		val, ok = p.parseAnyMatcher(expr)
+	case *charClassMatcher:
+		val, ok = p.parseCharClassMatcher(expr)
+	case *choiceExpr:
+		val, ok = p.parseChoiceExpr(expr)
+	case *labeledExpr:
+		val, ok = p.parseLabeledExpr(expr)
+	case *litMatcher:
+		val, ok = p.parseLitMatcher(expr)
+	case *notCodeExpr:
+		val, ok = p.parseNotCodeExpr(expr)
+	case *notExpr:
+		val, ok = p.parseNotExpr(expr)
+	case *oneOrMoreExpr:
+		val, ok = p.parseOneOrMoreExpr(expr)
+	case *recoveryExpr:
+		val, ok = p.parseRecoveryExpr(expr)
+	case *ruleRefExpr:
+		val, ok = p.parseRuleRefExpr(expr)
+	case *seqExpr:
+		val, ok = p.parseSeqExpr(expr)
+	case *stateCodeExpr:
+		val, ok = p.parseStateCodeExpr(expr)
+	case *throwExpr:
+		val, ok = p.parseThrowExpr(expr)
+	case *zeroOrMoreExpr:
+		val, ok = p.parseZeroOrMoreExpr(expr)
+	case *zeroOrOneExpr:
+		val, ok = p.parseZeroOrOneExpr(expr)
+	default:
+		panic(fmt.Sprintf("unknown expression type %T", expr))
+	}
+	if p.memoize {
+		p.setMemoized(pt, expr, resultTuple{val, ok, p.pt})
+	}
+	return val, ok
+}
+
+func (p *parser) parseActionExpr(act *actionExpr) (interface{}, bool) {
+	if p.debug {
+		defer p.out(p.in("parseActionExpr"))
+	}
+
+	start := p.pt
+	val, ok := p.parseExpr(act.expr)
+	if ok {
+		p.cur.pos = start.position
+		p.cur.text = p.sliceFrom(start)
+		state := p.cloneState()
+		actVal, err := act.run(p)
+		if err != nil {
+			p.addErrAt(err, start.position, []string{})
+		}
+		p.restoreState(state)
+
+		val = actVal
+	}
+	if ok && p.debug {
+		p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start)))
+	}
+	return val, ok
+}
+
+func (p *parser) parseAndCodeExpr(and *andCodeExpr) (interface{}, bool) {
+	if p.debug {
+		defer p.out(p.in("parseAndCodeExpr"))
+	}
+
+	state := p.cloneState()
+
+	ok, err := and.run(p)
+	if err != nil {
+		p.addErr(err)
+	}
+	p.restoreState(state)
+
+	return nil, ok
+}
+
+func (p *parser) parseAndExpr(and *andExpr) (interface{}, bool) {
+	if p.debug {
+		defer p.out(p.in("parseAndExpr"))
+	}
+
+	pt := p.pt
+	state := p.cloneState()
+	p.pushV()
+	_, ok := p.parseExpr(and.expr)
+	p.popV()
+	p.restoreState(state)
+	p.restore(pt)
+
+	return nil, ok
+}
+
+func (p *parser) parseAnyMatcher(any *anyMatcher) (interface{}, bool) {
+	if p.debug {
+		defer p.out(p.in("parseAnyMatcher"))
+	}
+
+	if p.pt.rn == utf8.RuneError && p.pt.w == 0 {
+		// EOF - see utf8.DecodeRune
+		p.failAt(false, p.pt.position, ".")
+		return nil, false
+	}
+	start := p.pt
+	p.read()
+	p.failAt(true, start.position, ".")
+	return p.sliceFrom(start), true
+}
+
+func (p *parser) parseCharClassMatcher(chr *charClassMatcher) (interface{}, bool) {
+	if p.debug {
+		defer p.out(p.in("parseCharClassMatcher"))
+	}
+
+	cur := p.pt.rn
+	start := p.pt
+
+	// can't match EOF
+	if cur == utf8.RuneError && p.pt.w == 0 { // see utf8.DecodeRune
+		p.failAt(false, start.position, chr.val)
+		return nil, false
+	}
+
+	if chr.ignoreCase {
+		cur = unicode.ToLower(cur)
+	}
+
+	// try to match in the list of available chars
+	for _, rn := range chr.chars {
+		if rn == cur {
+			if chr.inverted {
+				p.failAt(false, start.position, chr.val)
+				return nil, false
+			}
+			p.read()
+			p.failAt(true, start.position, chr.val)
+			return p.sliceFrom(start), true
+		}
+	}
+
+	// try to match in the list of ranges
+	for i := 0; i < len(chr.ranges); i += 2 {
+		if cur >= chr.ranges[i] && cur <= chr.ranges[i+1] {
+			if chr.inverted {
+				p.failAt(false, start.position, chr.val)
+				return nil, false
+			}
+			p.read()
+			p.failAt(true, start.position, chr.val)
+			return p.sliceFrom(start), true
+		}
+	}
+
+	// try to match in the list of Unicode classes
+	for _, cl := range chr.classes {
+		if unicode.Is(cl, cur) {
+			if chr.inverted {
+				p.failAt(false, start.position, chr.val)
+				return nil, false
+			}
+			p.read()
+			p.failAt(true, start.position, chr.val)
+			return p.sliceFrom(start), true
+		}
+	}
+
+	if chr.inverted {
+		p.read()
+		p.failAt(true, start.position, chr.val)
+		return p.sliceFrom(start), true
+	}
+	p.failAt(false, start.position, chr.val)
+	return nil, false
+}
+
+func (p *parser) incChoiceAltCnt(ch *choiceExpr, altI int) {
+	choiceIdent := fmt.Sprintf("%s %d:%d", p.rstack[len(p.rstack)-1].name, ch.pos.line, ch.pos.col)
+	m := p.ChoiceAltCnt[choiceIdent]
+	if m == nil {
+		m = make(map[string]int)
+		p.ChoiceAltCnt[choiceIdent] = m
+	}
+	// We increment altI by 1, so the keys do not start at 0
+	alt := strconv.Itoa(altI + 1)
+	if altI == choiceNoMatch {
+		alt = p.choiceNoMatch
+	}
+	m[alt]++
+}
+
+func (p *parser) parseChoiceExpr(ch *choiceExpr) (interface{}, bool) {
+	if p.debug {
+		defer p.out(p.in("parseChoiceExpr"))
+	}
+
+	for altI, alt := range ch.alternatives {
+		// dummy assignment to prevent compile error if optimized
+		_ = altI
+
+		state := p.cloneState()
+
+		p.pushV()
+		val, ok := p.parseExpr(alt)
+		p.popV()
+		if ok {
+			p.incChoiceAltCnt(ch, altI)
+			return val, ok
+		}
+		p.restoreState(state)
+	}
+	p.incChoiceAltCnt(ch, choiceNoMatch)
+	return nil, false
+}
+
+func (p *parser) parseLabeledExpr(lab *labeledExpr) (interface{}, bool) {
+	if p.debug {
+		defer p.out(p.in("parseLabeledExpr"))
+	}
+
+	p.pushV()
+	val, ok := p.parseExpr(lab.expr)
+	p.popV()
+	if ok && lab.label != "" {
+		m := p.vstack[len(p.vstack)-1]
+		m[lab.label] = val
+	}
+	return val, ok
+}
+
+func (p *parser) parseLitMatcher(lit *litMatcher) (interface{}, bool) {
+	if p.debug {
+		defer p.out(p.in("parseLitMatcher"))
+	}
+
+	start := p.pt
+	for _, want := range lit.val {
+		cur := p.pt.rn
+		if lit.ignoreCase {
+			cur = unicode.ToLower(cur)
+		}
+		if cur != want {
+			p.failAt(false, start.position, lit.want)
+			p.restore(start)
+			return nil, false
+		}
+		p.read()
+	}
+	p.failAt(true, start.position, lit.want)
+	return p.sliceFrom(start), true
+}
+
+func (p *parser) parseNotCodeExpr(not *notCodeExpr) (interface{}, bool) {
+	if p.debug {
+		defer p.out(p.in("parseNotCodeExpr"))
+	}
+
+	state := p.cloneState()
+
+	ok, err := not.run(p)
+	if err != nil {
+		p.addErr(err)
+	}
+	p.restoreState(state)
+
+	return nil, !ok
+}
+
+func (p *parser) parseNotExpr(not *notExpr) (interface{}, bool) {
+	if p.debug {
+		defer p.out(p.in("parseNotExpr"))
+	}
+
+	pt := p.pt
+	state := p.cloneState()
+	p.pushV()
+	p.maxFailInvertExpected = !p.maxFailInvertExpected
+	_, ok := p.parseExpr(not.expr)
+	p.maxFailInvertExpected = !p.maxFailInvertExpected
+	p.popV()
+	p.restoreState(state)
+	p.restore(pt)
+
+	return nil, !ok
+}
+
+func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (interface{}, bool) {
+	if p.debug {
+		defer p.out(p.in("parseOneOrMoreExpr"))
+	}
+
+	var vals []interface{}
+
+	for {
+		p.pushV()
+		val, ok := p.parseExpr(expr.expr)
+		p.popV()
+		if !ok {
+			if len(vals) == 0 {
+				// did not match once, no match
+				return nil, false
+			}
+			return vals, true
+		}
+		vals = append(vals, val)
+	}
+}
+
+func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (interface{}, bool) {
+	if p.debug {
+		defer p.out(p.in("parseRecoveryExpr (" + strings.Join(recover.failureLabel, ",") + ")"))
+	}
+
+	p.pushRecovery(recover.failureLabel, recover.recoverExpr)
+	val, ok := p.parseExpr(recover.expr)
+	p.popRecovery()
+
+	return val, ok
+}
+
+func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (interface{}, bool) {
+	if p.debug {
+		defer p.out(p.in("parseRuleRefExpr " + ref.name))
+	}
+
+	if ref.name == "" {
+		panic(fmt.Sprintf("%s: invalid rule: missing name", ref.pos))
+	}
+
+	rule := p.rules[ref.name]
+	if rule == nil {
+		p.addErr(fmt.Errorf("undefined rule: %s", ref.name))
+		return nil, false
+	}
+	return p.parseRule(rule)
+}
+
+func (p *parser) parseSeqExpr(seq *seqExpr) (interface{}, bool) {
+	if p.debug {
+		defer p.out(p.in("parseSeqExpr"))
+	}
+
+	vals := make([]interface{}, 0, len(seq.exprs))
+
+	pt := p.pt
+	state := p.cloneState()
+	for _, expr := range seq.exprs {
+		val, ok := p.parseExpr(expr)
+		if !ok {
+			p.restoreState(state)
+			p.restore(pt)
+			return nil, false
+		}
+		vals = append(vals, val)
+	}
+	return vals, true
+}
+
+func (p *parser) parseStateCodeExpr(state *stateCodeExpr) (interface{}, bool) {
+	if p.debug {
+		defer p.out(p.in("parseStateCodeExpr"))
+	}
+
+	err := state.run(p)
+	if err != nil {
+		p.addErr(err)
+	}
+	return nil, true
+}
+
+func (p *parser) parseThrowExpr(expr *throwExpr) (interface{}, bool) {
+	if p.debug {
+		defer p.out(p.in("parseThrowExpr"))
+	}
+
+	for i := len(p.recoveryStack) - 1; i >= 0; i-- {
+		if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok {
+			if val, ok := p.parseExpr(recoverExpr); ok {
+				return val, ok
+			}
+		}
+	}
+
+	return nil, false
+}
+
+func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (interface{}, bool) {
+	if p.debug {
+		defer p.out(p.in("parseZeroOrMoreExpr"))
+	}
+
+	var vals []interface{}
+
+	for {
+		p.pushV()
+		val, ok := p.parseExpr(expr.expr)
+		p.popV()
+		if !ok {
+			return vals, true
+		}
+		vals = append(vals, val)
+	}
+}
+
+func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (interface{}, bool) {
+	if p.debug {
+		defer p.out(p.in("parseZeroOrOneExpr"))
+	}
+
+	p.pushV()
+	val, _ := p.parseExpr(expr.expr)
+	p.popV()
+	// whether it matched or not, consider it a match
+	return val, true
+}
diff --git a/vendor/github.com/mmcloughlin/addchain/acc/parse/parse.go b/vendor/github.com/mmcloughlin/addchain/acc/parse/parse.go
new file mode 100644
index 00000000000..8523e146970
--- /dev/null
+++ b/vendor/github.com/mmcloughlin/addchain/acc/parse/parse.go
@@ -0,0 +1,35 @@
+// Package parse implements a parser for acc programs.
+package parse
+
+import (
+	"io"
+	"strings"
+
+	"github.com/mmcloughlin/addchain/acc/ast"
+	"github.com/mmcloughlin/addchain/acc/parse/internal/parser"
+)
+
+//go:generate pigeon -o internal/parser/zparser.go acc.peg
+
+// File parses filename.
+func File(filename string) (*ast.Chain, error) {
+	return cast(parser.ParseFile(filename))
+}
+
+// Reader parses the data from r using filename as information in
+// error messages.
+func Reader(filename string, r io.Reader) (*ast.Chain, error) {
+	return cast(parser.ParseReader(filename, r))
+}
+
+// String parses s.
+func String(s string) (*ast.Chain, error) {
+	return Reader("string", strings.NewReader(s))
+}
+
+func cast(i interface{}, err error) (*ast.Chain, error) {
+	if err != nil {
+		return nil, err
+	}
+	return i.(*ast.Chain), nil
+}
diff --git a/vendor/github.com/mmcloughlin/addchain/acc/pass/alloc.go b/vendor/github.com/mmcloughlin/addchain/acc/pass/alloc.go
new file mode 100644
index 00000000000..5c8e97d8232
--- /dev/null
+++ b/vendor/github.com/mmcloughlin/addchain/acc/pass/alloc.go
@@ -0,0 +1,98 @@
+package pass
+
+import (
+	"fmt"
+
+	"github.com/mmcloughlin/addchain/acc/ir"
+	"github.com/mmcloughlin/addchain/internal/container/heap"
+	"github.com/mmcloughlin/addchain/internal/errutil"
+)
+
+// Allocator pass assigns a minimal number of temporary variables to execute a program.
+type Allocator struct {
+	// Input is the name of the input variable. Note this is index 0, or the
+	// identity element of the addition chain.
+	Input string
+
+	// Output is the name to give to the final output of the addition chain. This
+	// variable may itself be used as a temporary during execution.
+	Output string
+
+	// Format defines how to format any temporary variables. This format string
+	// must accept one integer value. For example "t%d" would be a reasonable
+	// choice.
+	Format string
+}
+
+// Execute performs temporary variable allocation.
+func (a Allocator) Execute(p *ir.Program) error {
+	// Canonicalize operands and delete all names.
+	if err := Exec(p, Func(CanonicalizeOperands), Func(ClearNames)); err != nil {
+		return err
+	}
+
+	// Initialize allocation. This maps operand index to variable index. The
+	// inidicies 0 and 1 are special, reserved for the input and output
+	// respectively. Any indicies above that are temporaries.
+	out := p.Output()
+	allocation := map[int]int{
+		0:         0,
+		out.Index: 1,
+	}
+	n := 2
+
+	// Keep a heap of available indicies. Initially none.
+	available := heap.NewMinInts()
+
+	// Process instructions in reverse.
+	for i := len(p.Instructions) - 1; i >= 0; i-- {
+		inst := p.Instructions[i]
+
+		// The output operand variable now becomes available.
+		v, ok := allocation[inst.Output.Index]
+		if !ok {
+			return errutil.AssertionFailure("output operand %d missing allocation", inst.Output.Index)
+		}
+		available.Push(v)
+
+		// Inputs may need variables, if they are not already live.
+		for _, input := range inst.Op.Inputs() {
+			_, ok := allocation[input.Index]
+			if ok {
+				continue
+			}
+
+			// If there's nothing available, we'll need one more temporary.
+			if available.Empty() {
+				available.Push(n)
+				n++
+			}
+
+			allocation[input.Index] = available.Pop()
+		}
+	}
+
+	// Record allocation.
+	for _, op := range p.Operands {
+		op.Identifier = a.name(allocation[op.Index])
+	}
+
+	temps := []string{}
+	for i := 2; i < n; i++ {
+		temps = append(temps, a.name(i))
+	}
+	p.Temporaries = temps
+
+	return nil
+}
+
+func (a Allocator) name(v int) string {
+	switch v {
+	case 0:
+		return a.Input
+	case 1:
+		return a.Output
+	default:
+		return fmt.Sprintf(a.Format, v-2)
+	}
+}
diff --git a/vendor/github.com/mmcloughlin/addchain/acc/pass/eval.go b/vendor/github.com/mmcloughlin/addchain/acc/pass/eval.go
new file mode 100644
index 00000000000..2dd85b1dd73
--- /dev/null
+++ b/vendor/github.com/mmcloughlin/addchain/acc/pass/eval.go
@@ -0,0 +1,57 @@
+package pass
+
+import (
+	"errors"
+
+	"github.com/mmcloughlin/addchain"
+	"github.com/mmcloughlin/addchain/acc/ir"
+	"github.com/mmcloughlin/addchain/internal/errutil"
+)
+
+// Compile generates the fully unrolled sequence of additions. The result is
+// stored in the Program field.
+func Compile(p *ir.Program) error {
+	if p.Program != nil {
+		return nil
+	}
+
+	p.Program = addchain.Program{}
+	for _, i := range p.Instructions {
+		var out int
+		var err error
+
+		switch op := i.Op.(type) {
+		case ir.Add:
+			out, err = p.Program.Add(op.X.Index, op.Y.Index)
+		case ir.Double:
+			out, err = p.Program.Double(op.X.Index)
+		case ir.Shift:
+			out, err = p.Program.Shift(op.X.Index, op.S)
+		default:
+			return errutil.UnexpectedType(op)
+		}
+
+		if err != nil {
+			return err
+		}
+		if out != i.Output.Index {
+			return errors.New("incorrect output index")
+		}
+	}
+
+	return nil
+}
+
+// Eval evaluates the program and places the result in the Chain field.
+func Eval(p *ir.Program) error {
+	if p.Chain != nil {
+		return nil
+	}
+
+	if err := Compile(p); err != nil {
+		return err
+	}
+
+	p.Chain = p.Program.Evaluate()
+	return nil
+}
diff --git a/vendor/github.com/mmcloughlin/addchain/acc/pass/naming.go b/vendor/github.com/mmcloughlin/addchain/acc/pass/naming.go
new file mode 100644
index 00000000000..e417ef1cf45
--- /dev/null
+++ b/vendor/github.com/mmcloughlin/addchain/acc/pass/naming.go
@@ -0,0 +1,89 @@
+package pass
+
+import (
+	"fmt"
+	"math/big"
+
+	"github.com/mmcloughlin/addchain/acc/ir"
+	"github.com/mmcloughlin/addchain/internal/bigint"
+	"github.com/mmcloughlin/addchain/internal/errutil"
+)
+
+// References:
+//
+//	[curvechains]  Brian Smith. The Most Efficient Known Addition Chains for Field Element and
+//	               Scalar Inversion for the Most Popular and Most Unpopular Elliptic Curves. 2017.
+//	               https://briansmith.org/ecc-inversion-addition-chains-01 (accessed June 30, 2019)
+
+// Naming conventions described in [curvechains].
+var (
+	NameByteValues = NameBinaryValues(8, "_%b")
+	NameXRuns      = NameBinaryRuns("x%d")
+)
+
+// ClearNames deletes all operand names.
+func ClearNames(p *ir.Program) error {
+	if err := CanonicalizeOperands(p); err != nil {
+		return err
+	}
+
+	for _, operand := range p.Operands {
+		operand.Identifier = ""
+	}
+
+	return nil
+}
+
+// NameBinaryValues assigns variable names to operands with values less than 2ᵏ.
+// The identifier is determined from the format string, which should expect to
+// take one *big.Int argument.
+func NameBinaryValues(k int, format string) Interface {
+	return NameOperands(func(_ int, x *big.Int) string {
+		if x.BitLen() > k {
+			return ""
+		}
+		return fmt.Sprintf(format, x)
+	})
+}
+
+// NameBinaryRuns assigns variable names to operands with values of the form 2ⁿ
+// - 1. The identifier is determined from the format string, which takes the
+// length of the run as a parameter.
+func NameBinaryRuns(format string) Interface {
+	return NameOperands(func(_ int, x *big.Int) string {
+		n := uint(x.BitLen())
+		if !bigint.Equal(x, bigint.Ones(n)) {
+			return ""
+		}
+		return fmt.Sprintf(format, n)
+	})
+}
+
+// NameOperands builds a pass that names operands according to the given scheme.
+func NameOperands(name func(int, *big.Int) string) Interface {
+	return Func(func(p *ir.Program) error {
+		// We need canonical operands, and we need to know the chain values.
+		if err := Exec(p, Func(CanonicalizeOperands), Func(Eval)); err != nil {
+			return err
+		}
+
+		for _, operand := range p.Operands {
+			// Skip if it already has a name.
+			if operand.Identifier != "" {
+				continue
+			}
+
+			// Fetch referenced value.
+			idx := operand.Index
+			if idx >= len(p.Chain) {
+				return errutil.AssertionFailure("operand index %d out of bounds", idx)
+			}
+			x := p.Chain[idx]
+
+			// Set name.
+			operand.Identifier = name(idx, x)
+		}
+
+		return nil
+	})
+}
diff --git a/vendor/github.com/mmcloughlin/addchain/acc/pass/pass.go b/vendor/github.com/mmcloughlin/addchain/acc/pass/pass.go
new file mode 100644
index 00000000000..a9f1ee23d09
--- /dev/null
+++ b/vendor/github.com/mmcloughlin/addchain/acc/pass/pass.go
@@ -0,0 +1,117 @@
+// Package pass implements analysis and processing passes on acc programs.
+package pass
+
+import (
+	"fmt"
+
+	"github.com/mmcloughlin/addchain/acc/ir"
+	"github.com/mmcloughlin/addchain/internal/errutil"
+)
+
+// Interface for a processing pass.
+type Interface interface {
+	Execute(*ir.Program) error
+}
+
+// Func adapts a function to the pass Interface.
+type Func func(*ir.Program) error
+
+// Execute calls p.
+func (f Func) Execute(p *ir.Program) error {
+	return f(p)
+}
+
+// Concat returns a pass that executes the given passes in order, stopping on
+// the first error.
+func Concat(passes ...Interface) Interface {
+	return Func(func(p *ir.Program) error {
+		for _, pass := range passes {
+			if err := pass.Execute(p); err != nil {
+				return err
+			}
+		}
+		return nil
+	})
+}
+
+// Exec is a convenience for executing a list of passes on p.
+func Exec(p *ir.Program, passes ...Interface) error {
+	return Concat(passes...).Execute(p)
+}
+
+// CanonicalizeOperands ensures there is only one Operand object for each
+// operand index in the program. In particular, this ensures there are not
+// conflicting names for the same index. Populates the Operands field of the
+// program.
+func CanonicalizeOperands(p *ir.Program) error {
+	if p.Operands != nil {
+		return nil
+	}
+
+	p.Operands = map[int]*ir.Operand{}
+
+	// First pass through determines canonical operand for each index.
+	for _, i := range p.Instructions {
+		for _, operand := range i.Operands() {
+			// Look for an existing operand object for this index.
+			existing, found := p.Operands[operand.Index]
+			if !found {
+				p.Operands[operand.Index] = operand
+				continue
+			}
+
+			if existing == operand {
+				continue
+			}
+
+			// They're different objects. Check for a name conflict.
+			if existing.Identifier != "" && operand.Identifier != "" && existing.Identifier != operand.Identifier {
+				return fmt.Errorf("identifier conflict: index %d named %q and %q", operand.Index, operand.Identifier, existing.Identifier)
+			}
+
+			if operand.Identifier != "" {
+				existing.Identifier = operand.Identifier
+			}
+		}
+	}
+
+	// Second pass through replaces all operands with the canonical version.
+	for _, i := range p.Instructions {
+		switch op := i.Op.(type) {
+		case ir.Add:
+			i.Op = ir.Add{
+				X: p.Operands[op.X.Index],
+				Y: p.Operands[op.Y.Index],
+			}
+		case ir.Double:
+			i.Op = ir.Double{
+				X: p.Operands[op.X.Index],
+			}
+		case ir.Shift:
+			i.Op = ir.Shift{
+				X: p.Operands[op.X.Index],
+				S: op.S,
+			}
+		default:
+			return errutil.UnexpectedType(op)
+		}
+	}
+
+	return nil
+}
+
+// ReadCounts computes how many times each index is read in the program. This
+// populates the ReadCount field of the program.
+func ReadCounts(p *ir.Program) error {
+	if p.ReadCount != nil {
+		return nil
+	}
+
+	p.ReadCount = map[int]int{}
+	for _, i := range p.Instructions {
+		for _, input := range i.Op.Inputs() {
+			p.ReadCount[input.Index]++
+		}
+	}
+	return nil
+}
diff --git a/vendor/github.com/mmcloughlin/addchain/acc/pass/validation.go b/vendor/github.com/mmcloughlin/addchain/acc/pass/validation.go
new file mode 100644
index 00000000000..e93ba63c34d
--- /dev/null
+++ b/vendor/github.com/mmcloughlin/addchain/acc/pass/validation.go
@@ -0,0 +1,28 @@
+package pass
+
+import (
+	"fmt"
+
+	"github.com/mmcloughlin/addchain/acc/ir"
+)
+
+// Validate is a pass to sanity check an intermediate representation program.
+var Validate = Func(CheckDanglingInputs)
+
+// CheckDanglingInputs looks for program inputs that have no instruction
+// outputting them. Note this can happen and still be technically correct. For
+// example a shift instruction produces many intermediate results, and one of
+// these can later be referenced. The resulting program is still correct, but
+// undesirable.
+func CheckDanglingInputs(p *ir.Program) error {
+	outputset := map[int]bool{0: true}
+	for _, i := range p.Instructions {
+		for _, input := range i.Op.Inputs() {
+			if !outputset[input.Index] {
+				return fmt.Errorf("no output instruction for input index %d", input.Index)
+			}
+		}
+		outputset[i.Output.Index] = true
+	}
+	return nil
+}
diff --git a/vendor/github.com/mmcloughlin/addchain/acc/printer/printer.go b/vendor/github.com/mmcloughlin/addchain/acc/printer/printer.go
new file mode 100644
index 00000000000..96e8baa9f02
--- /dev/null
+++ b/vendor/github.com/mmcloughlin/addchain/acc/printer/printer.go
@@ -0,0 +1,131 @@
+// Package printer implements printing of acc AST nodes.
+package printer
+
+import (
+	"bytes"
+	"io"
+	"os"
+
+	"github.com/mmcloughlin/addchain/acc/ast"
+	"github.com/mmcloughlin/addchain/internal/errutil"
+	"github.com/mmcloughlin/addchain/internal/print"
+)
+
+// String prints the AST and returns resulting string.
+func String(n interface{}) (string, error) {
+	b, err := Bytes(n)
+	if err != nil {
+		return "", err
+	}
+	return string(b), nil
+}
+
+// Bytes prints the AST and returns resulting bytes.
+func Bytes(n interface{}) ([]byte, error) {
+	var buf bytes.Buffer
+	if err := Fprint(&buf, n); err != nil {
+		return nil, err
+	}
+	return buf.Bytes(), nil
+}
+
+// Print an AST node to standard out.
+func Print(n interface{}) error {
+	return Fprint(os.Stdout, n)
+}
+
+// Fprint writes the AST node n to w.
+func Fprint(w io.Writer, n interface{}) error {
+	p := newprinter(w)
+	p.node(n)
+	p.Flush()
+	return p.Error()
+}
+
+type printer struct {
+	*print.TabWriter
+}
+
+func newprinter(w io.Writer) *printer {
+	return &printer{
+		TabWriter: print.NewTabWriter(w, 1, 4, 1, ' ', 0),
+	}
+}
+
+func (p *printer) node(n interface{}) {
+	switch n := n.(type) {
+	case *ast.Chain:
+		for _, stmt := range n.Statements {
+			p.statement(stmt)
+		}
+	case ast.Statement:
+		p.statement(n)
+	case ast.Expr:
+		p.expr(n, nil)
+	default:
+		p.SetError(errutil.UnexpectedType(n))
+	}
+}
+
+func (p *printer) statement(stmt ast.Statement) {
+	if len(stmt.Name) > 0 {
+		p.Printf("%s\t=\t", stmt.Name)
+	} else {
+		p.Printf("return\t\t")
+	}
+	p.expr(stmt.Expr, nil)
+	p.NL()
+}
+
+func (p *printer) expr(e, parent ast.Expr) {
+	// Parens required if the precence of this operator is less than its parent.
+	if parent != nil && e.Precedence() < parent.Precedence() {
+		p.Printf("(")
+		p.expr(e, nil)
+		p.Printf(")")
+		return
+	}
+
+	switch e := e.(type) {
+	case ast.Operand:
+		p.operand(e)
+	case ast.Identifier:
+		p.identifier(e)
+	case ast.Add:
+		p.add(e)
+	case ast.Double:
+		p.double(e)
+	case ast.Shift:
+		p.shift(e)
+	default:
+		p.SetError(errutil.UnexpectedType(e))
+	}
+}
+
+func (p *printer) add(a ast.Add) {
+	p.expr(a.X, a)
+	p.Printf(" + ")
+	p.expr(a.Y, a)
+}
+
+func (p *printer) double(d ast.Double) {
+	p.Printf("2*")
+	p.expr(d.X, d)
+}
+
+func (p *printer) shift(s ast.Shift) {
+	p.expr(s.X, s)
+	p.Printf(" << %d", s.S)
+}
+
+func (p *printer) identifier(name ast.Identifier) {
+	p.Printf("%s", name)
+}
+
+func (p *printer) operand(op ast.Operand) {
+	if op == 0 {
+		p.Printf("1")
+	} else {
+		p.Printf("[%d]", op)
+	}
+}
diff --git a/vendor/github.com/mmcloughlin/addchain/acc/translate.go b/vendor/github.com/mmcloughlin/addchain/acc/translate.go
new file mode 100644
index 00000000000..91bfcf17e46
--- /dev/null
+++ b/vendor/github.com/mmcloughlin/addchain/acc/translate.go
@@ -0,0 +1,139 @@
+package acc
+
+import (
+	"fmt"
+
+	"github.com/mmcloughlin/addchain/acc/ast"
+	"github.com/mmcloughlin/addchain/acc/ir"
+	"github.com/mmcloughlin/addchain/internal/errutil"
+)
+
+// Translate converts an abstract syntax tree to an intermediate representation.
+func Translate(c *ast.Chain) (*ir.Program, error) {
+	s := newstate()
+	for _, stmt := range c.Statements {
+		if err := s.statement(stmt); err != nil {
+			return nil, err
+		}
+	}
+	return s.prog, nil
+}
+
+type state struct {
+	prog     *ir.Program
+	n        int
+	variable map[ast.Identifier]*ir.Operand
+}
+
+func newstate() *state {
+	return &state{
+		prog:     &ir.Program{},
+		n:        1,
+		variable: map[ast.Identifier]*ir.Operand{},
+	}
+}
+
+func (s *state) statement(stmt ast.Statement) error {
+	out, err := s.expr(stmt.Expr)
+	if err != nil {
+		return err
+	}
+	if err := s.define(stmt.Name, out); err != nil {
+		return err
+	}
+	return nil
+}
+
+func (s *state) expr(expr ast.Expr) (*ir.Operand, error) {
+	switch e := expr.(type) {
+	case ast.Operand:
+		return &ir.Operand{Index: int(e)}, nil
+	case ast.Identifier:
+		return s.lookup(e)
+	case ast.Add:
+		return s.add(e)
+	case ast.Double:
+		return s.double(e)
+	case ast.Shift:
+		return s.shift(e)
+	default:
+		return nil, errutil.UnexpectedType(e)
+	}
+}
+
+func (s *state) add(a ast.Add) (*ir.Operand, error) {
+	x, err := s.expr(a.X)
+	if err != nil {
+		return nil, err
+	}
+
+	y, err := s.expr(a.Y)
+	if err != nil {
+		return nil, err
+	}
+
+	if x.Index > y.Index {
+		x, y = y, x
+	}
+
+	out := ir.Index(s.n)
+	inst := &ir.Instruction{
+		Output: out,
+		Op:     ir.Add{X: x, Y: y},
+	}
+	s.prog.AddInstruction(inst)
+	s.n++
+
+	return out, nil
+}
+
+func (s *state) double(d ast.Double) (*ir.Operand, error) {
+	x, err := s.expr(d.X)
+	if err != nil {
+		return nil, err
+	}
+
+	out := ir.Index(s.n)
+	inst := &ir.Instruction{
+		Output: out,
+		Op:     ir.Double{X: x},
+	}
+	s.prog.AddInstruction(inst)
+	s.n++
+
+	return out, nil
+}
+
+func (s *state) shift(sh ast.Shift) (*ir.Operand, error) {
+	x, err := s.expr(sh.X)
+	if err != nil {
+		return nil, err
+	}
+
+	s.n += int(sh.S)
+	out := ir.Index(s.n - 1)
+	inst := &ir.Instruction{
+		Output: out,
+		Op:     ir.Shift{X: x, S: sh.S},
+	}
+	s.prog.AddInstruction(inst)
+
+	return out, nil
+}
+
+func (s *state) define(name ast.Identifier, op *ir.Operand) error {
+	if _, found := s.variable[name]; found {
+		return fmt.Errorf("cannot redefine %q", name)
+	}
+	op.Identifier = string(name)
+	s.variable[name] = op
+	return nil
+}
+
+func (s state) lookup(name ast.Identifier) (*ir.Operand, error) {
+	operand, ok := s.variable[name]
+	if !ok {
+		return nil, fmt.Errorf("variable %q undefined", name)
+	}
+	return operand, nil
+}
diff --git a/vendor/github.com/mmcloughlin/addchain/alg/alg.go b/vendor/github.com/mmcloughlin/addchain/alg/alg.go
new file mode 100644
index 00000000000..0555dacb0d3
--- /dev/null
+++ b/vendor/github.com/mmcloughlin/addchain/alg/alg.go
@@ -0,0 +1,43 @@
+// Package alg provides base types for addition chain and addition sequence search algorithms.
+package alg
+
+import (
+	"math/big"
+
+	"github.com/mmcloughlin/addchain"
+)
+
+// ChainAlgorithm is a method of generating an addition chain for a target integer.
+type ChainAlgorithm interface {
+	// FindChain generates an addition chain ending at target.
+	FindChain(target *big.Int) (addchain.Chain, error)
+
+	// String returns a name for the algorithm.
+	String() string
+}
+
+// SequenceAlgorithm is a method of generating an addition sequence for a set of
+// target values.
+type SequenceAlgorithm interface {
+	// FindSequence generates an addition chain containing every element of targets.
+	FindSequence(targets []*big.Int) (addchain.Chain, error)
+
+	// String returns a name for the algorithm.
+	String() string
+}
+
+// AsChainAlgorithm adapts a sequence algorithm to a chain algorithm. The
+// resulting algorithm calls the sequence algorithm with a singleton list
+// containing the target.
+func AsChainAlgorithm(s SequenceAlgorithm) ChainAlgorithm {
+	return asChainAlgorithm{s}
+}
+
+type asChainAlgorithm struct {
+	SequenceAlgorithm
+}
+
+// FindChain calls FindSequence with a singleton list containing the target.
+func (a asChainAlgorithm) FindChain(target *big.Int) (addchain.Chain, error) {
+	return a.FindSequence([]*big.Int{target})
+}
diff --git a/vendor/github.com/mmcloughlin/addchain/alg/contfrac/contfrac.go b/vendor/github.com/mmcloughlin/addchain/alg/contfrac/contfrac.go
new file mode 100644
index 00000000000..3c31a538356
--- /dev/null
+++ b/vendor/github.com/mmcloughlin/addchain/alg/contfrac/contfrac.go
@@ -0,0 +1,262 @@
+// Package contfrac implements addition sequence algorithms based on continued-fraction expansions.
+package contfrac
+
+import (
+	"fmt"
+	"math/big"
+
+	"github.com/mmcloughlin/addchain"
+	"github.com/mmcloughlin/addchain/internal/bigint"
+	"github.com/mmcloughlin/addchain/internal/bigints"
+)
+
+// References:
+//
+//	[contfrac]               F Bergeron, J Berstel, S Brlek and C Duboc. Addition chains using continued
+//	                         fractions. Journal of Algorithms. 1989.
+//	                         http://www-igm.univ-mlv.fr/~berstel/Articles/1989AdditionChainDuboc.pdf
+//	[efficientcompaddchain]  Bergeron, F., Berstel, J. and Brlek, S. Efficient computation of addition
+//	                         chains. Journal de theorie des nombres de Bordeaux. 1994.
+//	                         http://www.numdam.org/item/JTNB_1994__6_1_21_0
+//	[gencontfrac]            Amadou Tall and Ali Yassin Sanghare. Efficient computation of
+//	                         addition-subtraction chains using generalized continued Fractions. Cryptology
+//	                         ePrint Archive, Report 2013/466. 2013. https://eprint.iacr.org/2013/466
+//	[hehcc:exp]              Christophe Doche. Exponentiation. Handbook of Elliptic and Hyperelliptic Curve
+//	                         Cryptography, chapter 9. 2006.
+//	                         http://koclab.cs.ucsb.edu/teaching/ecc/eccPapers/Doche-ch09.pdf
+
+// Strategy is a method of choosing the auxiliary integer k in the continued
+// fraction method outlined in [efficientcompaddchain].
+type Strategy interface {
+	// K returns values of k to try given n.
+	K(n *big.Int) []*big.Int
+
+	// Singleton returns whether every call to K will return one value of k. This
+	// determines whether the resulting continued fractions sequence algorithm will
+	// be logarithmic, and therefore suitable for large inputs.
+	Singleton() bool
+
+	// String returns a name for the strategy.
+	String() string
+}
+
+// Strategies lists all available continued fraction strategies.
+var Strategies = []Strategy{
+	BinaryStrategy{},
+	CoBinaryStrategy{},
+	DichotomicStrategy{},
+	SqrtStrategy{},
+	TotalStrategy{},
+	DyadicStrategy{},
+	FermatStrategy{},
+}
+
+// Algorithm uses the continued fractions method for finding an addition chain
+// [contfrac] [efficientcompaddchain].
+type Algorithm struct {
+	strategy Strategy
+}
+
+// NewAlgorithm builds a continued fractions addition sequence algorithm using
+// the provided strategy for selecting the auziallary integer k.
+func NewAlgorithm(s Strategy) Algorithm {
+	return Algorithm{
+		strategy: s,
+	}
+}
+
+func (a Algorithm) String() string {
+	return fmt.Sprintf("continued_fractions(%s)", a.strategy)
+}
+
+// FindSequence applies the continued fractions method to build a chain
+// containing targets.
+func (a Algorithm) FindSequence(targets []*big.Int) (addchain.Chain, error) {
+	bigints.Sort(targets)
+	return a.chain(targets), nil
+}
+
+func (a Algorithm) minchain(n *big.Int) addchain.Chain {
+	if bigint.IsPow2(n) {
+		return bigint.Pow2UpTo(n)
+	}
+
+	if bigint.EqualInt64(n, 3) {
+		return bigints.Int64s(1, 2, 3)
+	}
+
+	var min addchain.Chain
+	for _, k := range a.strategy.K(n) {
+		c := a.chain([]*big.Int{k, n})
+		if min == nil || len(c) < len(min) {
+			min = c
+		}
+	}
+
+	return min
+}
+
+// chain produces a continued fraction chain for the given values. The slice ns
+// must be in ascending order.
+func (a Algorithm) chain(ns []*big.Int) addchain.Chain {
+	k := len(ns)
+	if k == 1 || ns[k-2].Cmp(bigint.One()) <= 0 {
+		return a.minchain(ns[k-1])
+	}
+
+	q, r := new(big.Int), new(big.Int)
+	q.DivMod(ns[k-1], ns[k-2], r)
+
+	cq := a.minchain(q)
+	remaining := bigints.Clone(ns[:k-1])
+
+	if bigint.IsZero(r) {
+		return addchain.Product(a.chain(remaining), cq)
+	}
+
+	remaining = bigints.InsertSortedUnique(remaining, r)
+	return addchain.Plus(addchain.Product(a.chain(remaining), cq), r)
+}
+
+// BinaryStrategy implements the binary strategy, which just sets k = floor(n/2). See [efficientcompaddchain] page 26.
+// Since this is a singleton strategy it gives rise to a logarithmic sequence algoirithm that may not be optimal.
+type BinaryStrategy struct{}
+
+func (BinaryStrategy) String() string { return "binary" }
+
+// Singleton returns true, since the binary strategy returns a single proposal
+// for k.
+func (BinaryStrategy) Singleton() bool { return true }
+
+// K returns floor(n/2).
+func (BinaryStrategy) K(n *big.Int) []*big.Int {
+	k := new(big.Int).Rsh(n, 1)
+	return []*big.Int{k}
+}
+
+// CoBinaryStrategy implements the co-binary strategy, also referred to as the
+// "modified-binary" strategy. See [efficientcompaddchain] page 26 or
+// [gencontfrac] page 6. Since this is a singleton strategy it gives rise to a
+// logarithmic sequence algorithm that may not be optimal.
+type CoBinaryStrategy struct{}
+
+func (CoBinaryStrategy) String() string { return "co_binary" }
+
+// Singleton returns true, since the co-binary strategy returns a single
+// proposal for k.
+func (CoBinaryStrategy) Singleton() bool { return true }
+
+// K returns floor(n/2) when n is even, or floor((n+1)/2) when n is odd.
+func (CoBinaryStrategy) K(n *big.Int) []*big.Int {
+	k := bigint.Clone(n)
+	if k.Bit(0) == 1 {
+		k.Add(k, bigint.One())
+	}
+	k.Rsh(k, 1)
+	return []*big.Int{k}
+}
+
+// TotalStrategy returns all possible values of k less than n. This will result
+// in the optimal continued fraction chain at a complexity of O(n² log²(n)).
+// Note that the optimal continued fraction chain is not necessarily the optimal
+// chain. Must not be used for large inputs.
+type TotalStrategy struct{}
+
+func (TotalStrategy) String() string { return "total" }
+
+// Singleton returns false, since the total strategy returns more than once k.
+func (TotalStrategy) Singleton() bool { return false }
+
+// K returns {2,, 3, ..., n-1}.
+func (TotalStrategy) K(n *big.Int) []*big.Int {
+	ks := []*big.Int{}
+	k := big.NewInt(2)
+	one := bigint.One()
+	for k.Cmp(n) < 0 {
+		ks = append(ks, bigint.Clone(k))
+		k.Add(k, one)
+	}
+	return ks
+}
+
+// DyadicStrategy implements the Dyadic Strategy, defined in
+// [efficientcompaddchain] page 28. This gives rise to a sequence algorithm with
+// complexity O(n*log³(n)). Must not be used for large inputs.
+type DyadicStrategy struct{}
+
+func (DyadicStrategy) String() string { return "dyadic" }
+
+// Singleton returns false, since the dyadic strategy returns more than once k.
+func (DyadicStrategy) Singleton() bool { return false }
+
+// K returns floor( n / 2ʲ ) for all j.
+func (DyadicStrategy) K(n *big.Int) []*big.Int {
+	ks := []*big.Int{}
+	k := new(big.Int).Rsh(n, 1)
+	one := bigint.One()
+	for k.Cmp(one) > 0 {
+		ks = append(ks, bigint.Clone(k))
+		k.Rsh(k, 1)
+	}
+	return ks
+}
+
+// FermatStrategy implements Fermat's Strategy, defined in
+// [efficientcompaddchain] page 28. This returns a set of possible k of size
+// O(log(log(n))), giving rise to a faster algorithm than the Dyadic strategy.
+// This has been shown to be near optimal for small inputs. Must not be used for
+// large inputs.
+type FermatStrategy struct{}
+
+func (FermatStrategy) String() string { return "fermat" }
+
+// Singleton returns false, since Fermat's strategy returns more than once k.
+func (FermatStrategy) Singleton() bool { return false }
+
+// K returns floor( n / 2^(2^j) ) for all j.
+func (FermatStrategy) K(n *big.Int) []*big.Int {
+	ks := []*big.Int{}
+	k := new(big.Int).Rsh(n, 1)
+	one := bigint.One()
+	s := uint(1)
+	for k.Cmp(one) > 0 {
+		ks = append(ks, bigint.Clone(k))
+		k.Rsh(k, s)
+		s *= 2
+	}
+	return ks
+}
+
+// DichotomicStrategy is a singleton strategy, defined in
+// [efficientcompaddchain] page 28. This gives rise to a logarithmic sequence
+// algorithm, but the result is not necessarily optimal.
+type DichotomicStrategy struct{}
+
+func (DichotomicStrategy) String() string { return "dichotomic" }
+
+// Singleton returns true, since the dichotomic strategy suggests just one k.
+func (DichotomicStrategy) Singleton() bool { return true }
+
+// K returns only one suggestion for k, namely floor( n / 2ʰ ) where h = log2(n)/2.
+func (DichotomicStrategy) K(n *big.Int) []*big.Int {
+	l := n.BitLen()
+	h := uint(l) / 2
+	k := new(big.Int).Div(n, bigint.Pow2(h))
+	return []*big.Int{k}
+}
+
+// SqrtStrategy chooses k to be floor(sqrt(n)). See [gencontfrac] page 6. Since
+// this is a singleton strategy, it gives rise to a logarithmic sequence
+// algorithm that's not necessarily optimal.
+type SqrtStrategy struct{}
+
+func (SqrtStrategy) String() string { return "sqrt" }
+
+// Singleton returns true, since the square root strategy suggests just one k.
+func (SqrtStrategy) Singleton() bool { return false }
+
+// K returns floor(sqrt(n)).
+func (SqrtStrategy) K(n *big.Int) []*big.Int {
+	sqrt := new(big.Int).Sqrt(n)
+	return []*big.Int{sqrt}
+}
diff --git a/vendor/github.com/mmcloughlin/addchain/alg/dict/dict.go b/vendor/github.com/mmcloughlin/addchain/alg/dict/dict.go
new file mode 100644
index 00000000000..08a1fa0e0fe
--- /dev/null
+++ b/vendor/github.com/mmcloughlin/addchain/alg/dict/dict.go
@@ -0,0 +1,434 @@
+// Package dict implements dictionary and run-length addition chain algorithms.
+package dict
+
+import (
+	"errors"
+	"fmt"
+	"math/big"
+	"sort"
+
+	"github.com/mmcloughlin/addchain"
+	"github.com/mmcloughlin/addchain/alg"
+	"github.com/mmcloughlin/addchain/internal/bigint"
+	"github.com/mmcloughlin/addchain/internal/bigints"
+	"github.com/mmcloughlin/addchain/internal/bigvector"
+)
+
+// References:
+//
+//	[braueraddsubchains]  Martin Otto. Brauer addition-subtraction chains. PhD thesis, Universitat
+//	                      Paderborn. 2001.
+//	                      http://www.martin-otto.de/publications/docs/2001_MartinOtto_Diplom_BrauerAddition-SubtractionChains.pdf
+//	[genshortchains]      Kunihiro, Noboru and Yamamoto, Hirosuke. New Methods for Generating Short
+//	                      Addition Chains. IEICE Transactions on Fundamentals of Electronics
+//	                      Communications and Computer Sciences. 2000.
+//	                      https://pdfs.semanticscholar.org/b398/d10faca35af9ce5a6026458b251fd0a5640c.pdf
+//	[hehcc:exp]           Christophe Doche. Exponentiation. Handbook of Elliptic and Hyperelliptic Curve
+//	                      Cryptography, chapter 9. 2006.
+//	                      http://koclab.cs.ucsb.edu/teaching/ecc/eccPapers/Doche-ch09.pdf
+
+// Term represents the integer D * 2ᴱ.
+type Term struct {
+	D *big.Int
+	E uint
+}
+
+// Int converts the term to an integer.
+func (t Term) Int() *big.Int {
+	return new(big.Int).Lsh(t.D, t.E)
+}
+
+// Sum is the representation of an integer as a sum of dictionary terms. See
+// [hehcc:exp] definition 9.34.
+type Sum []Term
+
+// Int computes the dictionary sum as an integer.
+func (s Sum) Int() *big.Int {
+	x := bigint.Zero()
+	for _, t := range s {
+		x.Add(x, t.Int())
+	}
+	return x
+}
+
+// SortByExponent sorts terms in ascending order of the exponent E.
+func (s Sum) SortByExponent() {
+	sort.Slice(s, func(i, j int) bool { return s[i].E < s[j].E })
+}
+
+// Dictionary returns the distinct D values in the terms of this sum. The values
+// are returned in ascending order.
+func (s Sum) Dictionary() []*big.Int {
+	dict := make([]*big.Int, 0, len(s))
+	for _, t := range s {
+		dict = append(dict, t.D)
+	}
+	bigints.Sort(dict)
+	return bigints.Unique(dict)
+}
+
+// Decomposer is a method of breaking an integer into a dictionary sum.
+type Decomposer interface {
+	Decompose(x *big.Int) Sum
+	String() string
+}
+
+// FixedWindow breaks integers into k-bit windows.
+type FixedWindow struct {
+	K uint // Window size.
+}
+
+func (w FixedWindow) String() string { return fmt.Sprintf("fixed_window(%d)", w.K) }
+
+// Decompose represents x in terms of k-bit windows from left to right.
+func (w FixedWindow) Decompose(x *big.Int) Sum {
+	sum := Sum{}
+	h := x.BitLen()
+	for h > 0 {
+		l := max(h-int(w.K), 0)
+		d := bigint.Extract(x, uint(l), uint(h))
+		if bigint.IsNonZero(d) {
+			sum = append(sum, Term{D: d, E: uint(l)})
+		}
+		h = l
+	}
+	sum.SortByExponent()
+	return sum
+}
+
+// SlidingWindow breaks integers into k-bit windows, skipping runs of zeros
+// where possible. See [hehcc:exp] section 9.1.3 or [braueraddsubchains] section
+// 1.2.3.
+type SlidingWindow struct {
+	K uint // Window size.
+}
+
+func (w SlidingWindow) String() string { return fmt.Sprintf("sliding_window(%d)", w.K) }
+
+// Decompose represents x in base 2ᵏ.
+func (w SlidingWindow) Decompose(x *big.Int) Sum {
+	sum := Sum{}
+	h := x.BitLen() - 1
+	for h >= 0 {
+		// Find first 1.
+		for h >= 0 && x.Bit(h) == 0 {
+			h--
+		}
+
+		if h < 0 {
+			break
+		}
+
+		// Look down k positions.
+		l := max(h-int(w.K)+1, 0)
+
+		// Advance to the next 1.
+		for x.Bit(l) == 0 {
+			l++
+		}
+
+		sum = append(sum, Term{
+			D: bigint.Extract(x, uint(l), uint(h+1)),
+			E: uint(l),
+		})
+
+		h = l - 1
+	}
+	sum.SortByExponent()
+	return sum
+}
+
+// RunLength decomposes integers in to runs of 1s up to a maximal length. See
+// [genshortchains] Section 3.1.
+type RunLength struct {
+	T uint // Maximal run length. Zero means no limit.
+}
+
+func (r RunLength) String() string { return fmt.Sprintf("run_length(%d)", r.T) }
+
+// Decompose breaks x into runs of 1 bits.
+func (r RunLength) Decompose(x *big.Int) Sum {
+	sum := Sum{}
+	i := x.BitLen() - 1
+	for i >= 0 {
+		// Find first 1.
+		for i >= 0 && x.Bit(i) == 0 {
+			i--
+		}
+
+		if i < 0 {
+			break
+		}
+
+		// Look for the end of the run.
+		s := i
+		for i >= 0 && x.Bit(i) == 1 && (r.T == 0 || uint(s-i) < r.T) {
+			i--
+		}
+
+		// We have a run from s to i+1.
+		sum = append(sum, Term{
+			D: bigint.Ones(uint(s - i)),
+			E: uint(i + 1),
+		})
+	}
+	sum.SortByExponent()
+	return sum
+}
+
+// Hybrid is a mix of the sliding window and run length decomposition methods,
+// similar to the "Hybrid Method" of [genshortchains] Section 3.3.
+type Hybrid struct {
+	K uint // Window size.
+	T uint // Maximal run length. Zero means no limit.
+}
+
+func (h Hybrid) String() string { return fmt.Sprintf("hybrid(%d,%d)", h.K, h.T) }
+
+// Decompose breaks x into k-bit sliding windows or runs of 1s up to length T.
+func (h Hybrid) Decompose(x *big.Int) Sum {
+	sum := Sum{}
+
+	// Clone since we'll be modifying it.
+	y := bigint.Clone(x)
+
+	// Process runs of length at least K.
+	i := y.BitLen() - 1
+	for i >= 0 {
+		// Find first 1.
+		for i >= 0 && y.Bit(i) == 0 {
+			i--
+		}
+
+		if i < 0 {
+			break
+		}
+
+		// Look for the end of the run.
+		s := i
+		for i >= 0 && y.Bit(i) == 1 && (h.T == 0 || uint(s-i) < h.T) {
+			i--
+		}
+
+		// We have a run from s to i+1. Skip it if its short.
+		n := uint(s - i)
+		if n <= h.K {
+			continue
+		}
+
+		// Add it to the sum and remove it from the integer.
+		sum = append(sum, Term{
+			D: bigint.Ones(n),
+			E: uint(i + 1),
+		})
+
+		y.Xor(y, bigint.Mask(uint(i+1), uint(s+1)))
+	}
+
+	// Process what remains with a sliding window.
+	w := SlidingWindow{K: h.K}
+	rem := w.Decompose(y)
+
+	sum = append(sum, rem...)
+	sum.SortByExponent()
+
+	return sum
+}
+
+// Algorithm implements a general dictionary-based chain construction algorithm,
+// as in [braueraddsubchains] Algorithm 1.26. This operates in three stages:
+// decompose the target into a sum of dictionray terms, use a sequence algorithm
+// to generate the dictionary, then construct the target from the dictionary
+// terms.
+type Algorithm struct {
+	decomp Decomposer
+	seqalg alg.SequenceAlgorithm
+}
+
+// NewAlgorithm builds a dictionary algorithm that breaks up integers using the
+// decomposer d and uses the sequence algorithm s to generate dictionary
+// entries.
+func NewAlgorithm(d Decomposer, a alg.SequenceAlgorithm) *Algorithm {
+	return &Algorithm{
+		decomp: d,
+		seqalg: a,
+	}
+}
+
+func (a Algorithm) String() string {
+	return fmt.Sprintf("dictionary(%s,%s)", a.decomp, a.seqalg)
+}
+
+// FindChain builds an addition chain producing n. This works by using the
+// configured Decomposer to represent n as a sum of dictionary terms, then
+// delegating to the SequenceAlgorithm to build a chain producing the
+// dictionary, and finally using the dictionary terms to construct n. See
+// [genshortchains] Section 2 for a full description.
+func (a Algorithm) FindChain(n *big.Int) (addchain.Chain, error) {
+	// Decompose the target.
+	sum := a.decomp.Decompose(n)
+	sum.SortByExponent()
+
+	// Extract dictionary.
+	dict := sum.Dictionary()
+
+	// Use the sequence algorithm to produce a chain for each element of the dictionary.
+	c, err := a.seqalg.FindSequence(dict)
+	if err != nil {
+		return nil, err
+	}
+
+	// Reduce.
+	sum, c, err = primitive(sum, c)
+	if err != nil {
+		return nil, err
+	}
+
+	// Build chain for n out of the dictionary.
+	dc := dictsumchain(sum)
+	c = append(c, dc...)
+	bigints.Sort(c)
+	c = addchain.Chain(bigints.Unique(c))
+
+	return c, nil
+}
+
+// dictsumchain builds a chain for the integer represented by sum, assuming that
+// all the terms of the sum are already present. Therefore this is intended to
+// be appended to a chain that already contains the dictionary terms.
+func dictsumchain(sum Sum) addchain.Chain {
+	c := addchain.Chain{}
+	k := len(sum) - 1
+	cur := bigint.Clone(sum[k].D)
+	for ; k > 0; k-- {
+		// Shift until the next exponent.
+		for i := sum[k].E; i > sum[k-1].E; i-- {
+			cur.Lsh(cur, 1)
+			c.AppendClone(cur)
+		}
+
+		// Add in the dictionary term at this position.
+		cur.Add(cur, sum[k-1].D)
+		c.AppendClone(cur)
+	}
+
+	for i := sum[0].E; i > 0; i-- {
+		cur.Lsh(cur, 1)
+		c.AppendClone(cur)
+	}
+
+	return c
+}
+
+// primitive removes terms from the dictionary that are only required once.
+//
+// The general structure of dictionary based algorithm is to decompose the
+// target into a sum of dictionary terms, then create a chain for the
+// dictionary, and then create the target from that. In a case where a
+// dictionary term is only required once in the target, this can cause extra
+// work. In such a case, we will spend operations on creating the dictionary
+// term independently, and then later add it into the result. Since it is only
+// needed once, we can effectively construct the dictionary term "on the fly" as
+// we build up the final target.
+//
+// This function looks for such opportunities. If it finds them it will produce
+// an alternative dictionary sum that replaces that term with a sum of smaller
+// terms.
+func primitive(sum Sum, c addchain.Chain) (Sum, addchain.Chain, error) {
+	// This optimization cannot apply if the sum has only one term.
+	if len(sum) == 1 {
+		return sum, c, nil
+	}
+
+	n := len(c)
+
+	// We'll need a mapping from chain elements to where they appear in the chain.
+	idx := map[string]int{}
+	for i, x := range c {
+		idx[x.String()] = i
+	}
+
+	// Build program for the chain.
+	p, err := c.Program()
+	if err != nil {
+		return nil, nil, err
+	}
+
+	// How many times is each index read during construction, and during its use in the dictionary chain.
+	reads := p.ReadCounts()
+
+	for _, t := range sum {
+		i := idx[t.D.String()]
+		reads[i]++
+	}
+
+	// Now, the primitive dictionary elements are those that are read at least twice, and their dependencies.
+	deps := p.Dependencies()
+	primitive := make([]bool, n)
+
+	for i, numreads := range reads {
+		if numreads < 2 {
+			continue
+		}
+		primitive[i] = true
+		for _, j := range bigint.BitsSet(deps[i]) {
+			primitive[j] = true
+		}
+	}
+
+	// Express every position in the chain as a linear combination of dictionary
+	// terms that are used more than once.
+	vc := []bigvector.Vector{bigvector.NewBasis(n, 0)}
+	for i, op := range p {
+		var next bigvector.Vector
+		if primitive[i+1] {
+			next = bigvector.NewBasis(n, i+1)
+		} else {
+			next = bigvector.Add(vc[op.I], vc[op.J])
+		}
+		vc = append(vc, next)
+	}
+
+	// Now express the target sum in terms that are used more than once.
+	v := bigvector.New(n)
+	for _, t := range sum {
+		i := idx[t.D.String()]
+		v = bigvector.Add(v, bigvector.Lsh(vc[i], t.E))
+	}
+
+	// Rebuild this into a dictionary sum.
+	out := Sum{}
+	for i := 0; i < v.Len(); i++ {
+		for _, e := range bigint.BitsSet(v.Idx(i)) {
+			out = append(out, Term{
+				D: c[i],
+				E: uint(e),
+			})
+		}
+	}
+
+	out.SortByExponent()
+
+	// We should have not changed the sum.
+	if !bigint.Equal(out.Int(), sum.Int()) {
+		return nil, nil, errors.New("reconstruction does not match")
+	}
+
+	// Prune any elements of the chain that are used only once.
+	pruned := addchain.Chain{}
+	for i, x := range c {
+		if primitive[i] {
+			pruned = append(pruned, x)
+		}
+	}
+
+	return out, pruned, nil
+}
+
+// max returns the maximum of a and b.
+func max(a, b int) int {
+	if a > b {
+		return a
+	}
+	return b
+}
diff --git a/vendor/github.com/mmcloughlin/addchain/alg/dict/runs.go b/vendor/github.com/mmcloughlin/addchain/alg/dict/runs.go
new file mode 100644
index 00000000000..5277be47643
--- /dev/null
+++ b/vendor/github.com/mmcloughlin/addchain/alg/dict/runs.go
@@ -0,0 +1,108 @@
+package dict
+
+import (
+	"errors"
+	"fmt"
+	"math/big"
+
+	"github.com/mmcloughlin/addchain"
+	"github.com/mmcloughlin/addchain/alg"
+	"github.com/mmcloughlin/addchain/internal/bigint"
+	"github.com/mmcloughlin/addchain/internal/bigints"
+)
+
+// RunsAlgorithm is a custom variant of the dictionary approach that decomposes
+// a target into runs of ones. It leverages the observation that building a
+// dictionary consisting of runs of 1s of lengths l₁, l₂, ..., l_k can itself
+// be reduced to first finding an addition chain for the run lengths. Then from
+// this chain we can build a chain for the runs themselves.
+type RunsAlgorithm struct {
+	seqalg alg.SequenceAlgorithm
+}
+
+// NewRunsAlgorithm constructs a RunsAlgorithm using the given sequence
+// algorithm to generate addition sequences for run lengths. Note that since run
+// lengths are far smaller than the integers themselves, this sequence algorithm
+// does not need to be able to handle large integers.
+func NewRunsAlgorithm(a alg.SequenceAlgorithm) *RunsAlgorithm {
+	return &RunsAlgorithm{
+		seqalg: a,
+	}
+}
+
+func (a RunsAlgorithm) String() string {
+	return fmt.Sprintf("runs(%s)", a.seqalg)
+}
+
+// FindChain uses the run lengths method to find a chain for n.
+func (a RunsAlgorithm) FindChain(n *big.Int) (addchain.Chain, error) {
+	// Find the runs in n.
+	d := RunLength{T: 0}
+	sum := d.Decompose(n)
+	runs := sum.Dictionary()
+
+	// Treat the run lengths themselves as a sequence to be solved.
+	lengths := []*big.Int{}
+	for _, run := range runs {
+		length := int64(run.BitLen())
+		lengths = append(lengths, big.NewInt(length))
+	}
+
+	// Delegate to the sequence algorithm for a solution.
+	lc, err := a.seqalg.FindSequence(lengths)
+	if err != nil {
+		return nil, err
+	}
+
+	// Build a dictionary chain from this.
+	c, err := RunsChain(lc)
+	if err != nil {
+		return nil, err
+	}
+
+	// Reduce.
+	sum, c, err = primitive(sum, c)
+	if err != nil {
+		return nil, err
+	}
+
+	// Build chain for n out of the dictionary.
+	dc := dictsumchain(sum)
+	c = append(c, dc...)
+	bigints.Sort(c)
+	c = addchain.Chain(bigints.Unique(c))
+
+	return c, nil
+}
+
+// RunsChain takes a chain for the run lengths and generates a chain for the
+// runs themselves. That is, if the provided chain is l₁, l₂, ..., l_k then
+// the result will contain r(l₁), r(l₂), ..., r(l_k) where r(n) = 2ⁿ - 1.
+func RunsChain(lc addchain.Chain) (addchain.Chain, error) {
+	p, err := lc.Program()
+	if err != nil {
+		return nil, err
+	}
+
+	c := addchain.New()
+	s := map[uint]uint{} // current largest shift of each run length
+	for _, op := range p {
+		a, b := bigint.MinMax(lc[op.I], lc[op.J])
+		if !a.IsUint64() || !b.IsUint64() {
+			return nil, errors.New("values in lengths chain are far too large")
+		}
+
+		la := uint(a.Uint64())
+		lb := uint(b.Uint64())
+
+		rb := bigint.Ones(lb)
+		for ; s[lb] < la; s[lb]++ {
+			shift := new(big.Int).Lsh(rb, s[lb]+1)
+			c = append(c, shift)
+		}
+
+		c = append(c, bigint.Ones(la+lb))
+	}
+
+	return c, nil
+}
diff --git a/vendor/github.com/mmcloughlin/addchain/alg/ensemble/ensemble.go b/vendor/github.com/mmcloughlin/addchain/alg/ensemble/ensemble.go
new file mode 100644
index 00000000000..f84a2626753
--- /dev/null
+++ b/vendor/github.com/mmcloughlin/addchain/alg/ensemble/ensemble.go
@@ -0,0 +1,71 @@
+// Package ensemble provides a collection of addition chain algorithms intended
+// for target integers of cryptographic interest.
+package ensemble
+
+import (
+	"github.com/mmcloughlin/addchain/alg"
+	"github.com/mmcloughlin/addchain/alg/contfrac"
+	"github.com/mmcloughlin/addchain/alg/dict"
+	"github.com/mmcloughlin/addchain/alg/heuristic"
+	"github.com/mmcloughlin/addchain/alg/opt"
+)
+
+// Ensemble is a convenience for building an ensemble of chain algorithms intended for large integers.
+func Ensemble() []alg.ChainAlgorithm {
+	// Choose sequence algorithms.
+	seqalgs := []alg.SequenceAlgorithm{
+		heuristic.NewAlgorithm(heuristic.UseFirst(
+			heuristic.Halving{},
+			heuristic.DeltaLargest{},
+		)),
+		heuristic.NewAlgorithm(heuristic.UseFirst(
+			heuristic.Halving{},
+			heuristic.Approximation{},
+		)),
+	}
+
+	for _, strategy := range contfrac.Strategies {
+		if strategy.Singleton() {
+			seqalgs = append(seqalgs, contfrac.NewAlgorithm(strategy))
+		}
+	}
+
+	// Build decomposers.
+	decomposers := []dict.Decomposer{}
+	for k := uint(4); k <= 128; k *= 2 {
+		decomposers = append(decomposers, dict.SlidingWindow{K: k})
+	}
+
+	decomposers = append(decomposers, dict.RunLength{T: 0})
+	for t := uint(16); t <= 128; t *= 2 {
+		decomposers = append(decomposers, dict.RunLength{T: t})
+	}
+
+	for k := uint(2); k <= 8; k++ {
+		decomposers = append(decomposers, dict.Hybrid{K: k, T: 0})
+		for t := uint(16); t <= 64; t *= 2 {
+			decomposers = append(decomposers, dict.Hybrid{K: k, T: t})
+		}
+	}
+
+	// Build dictionary algorithms for every combination.
+	as := []alg.ChainAlgorithm{}
+	for _, decomp := range decomposers {
+		for _, seqalg := range seqalgs {
+			a := dict.NewAlgorithm(decomp, seqalg)
+			as = append(as, a)
+		}
+	}
+
+	// Add the runs algorithms.
+	for _, seqalg := range seqalgs {
+		as = append(as, dict.NewRunsAlgorithm(seqalg))
+	}
+
+	// Wrap in an optimization layer.
+	for i, a := range as {
+		as[i] = opt.Algorithm{Algorithm: a}
+	}
+
+	return as
+}
diff --git a/vendor/github.com/mmcloughlin/addchain/alg/exec/exec.go b/vendor/github.com/mmcloughlin/addchain/alg/exec/exec.go
new file mode 100644
index 00000000000..731aa29e176
--- /dev/null
+++ b/vendor/github.com/mmcloughlin/addchain/alg/exec/exec.go
@@ -0,0 +1,99 @@
+// Package exec implements addition chain algorithm execution.
+package exec
+
+import (
+	"errors"
+	"io/ioutil"
+	"log"
+	"math/big"
+	"runtime"
+
+	"github.com/mmcloughlin/addchain"
+	"github.com/mmcloughlin/addchain/alg"
+	"github.com/mmcloughlin/addchain/internal/bigint"
+)
+
+// Result from applying an algorithm to a target.
+type Result struct {
+	Target    *big.Int
+	Algorithm alg.ChainAlgorithm
+	Err       error
+	Chain     addchain.Chain
+	Program   addchain.Program
+}
+
+// Execute the algorithm on the target number n.
+func Execute(n *big.Int, a alg.ChainAlgorithm) Result {
+	r := Result{
+		Target:    n,
+		Algorithm: a,
+	}
+
+	r.Chain, r.Err = a.FindChain(n)
+	if r.Err != nil {
+		return r
+	}
+
+	// Note this also performs validation.
+	r.Program, r.Err = r.Chain.Program()
+	if r.Err != nil {
+		return r
+	}
+
+	// Still, verify that it produced what we wanted.
+	if !bigint.Equal(r.Chain.End(), n) {
+		r.Err = errors.New("did not produce the required value")
+	}
+
+	return r
+}
+
+// Parallel executes multiple algorithms in parallel.
+type Parallel struct {
+	limit  int
+	logger *log.Logger
+}
+
+// NewParallel builds a new parallel executor.
+func NewParallel() *Parallel {
+	return &Parallel{
+		limit:  runtime.NumCPU(),
+		logger: log.New(ioutil.Discard, "", 0),
+	}
+}
+
+// SetConcurrency sets the number of algorithms that may be run in parallel.
+func (p *Parallel) SetConcurrency(limit int) {
+	p.limit = limit
+}
+
+// SetLogger sets logging output.
+func (p *Parallel) SetLogger(l *log.Logger) {
+	p.logger = l
+}
+
+// Execute all algorithms against the provided target.
+func (p Parallel) Execute(n *big.Int, as []alg.ChainAlgorithm) []Result {
+	rs := make([]Result, len(as))
+
+	// Use buffered channel to limit concurrency.
+	type token struct{}
+	sem := make(chan token, p.limit)
+
+	for i, a := range as {
+		sem <- token{}
+		go func(i int, a alg.ChainAlgorithm) {
+			p.logger.Printf("start: %s", a)
+			rs[i] = Execute(n, a)
+			p.logger.Printf("done: %s", a)
+			<-sem
+		}(i, a)
+	}
+
+	// Wait for completion.
+	for i := 0; i < p.limit; i++ {
+		sem <- token{}
+	}
+
+	return rs
+}
diff --git a/vendor/github.com/mmcloughlin/addchain/alg/heuristic/heuristic.go b/vendor/github.com/mmcloughlin/addchain/alg/heuristic/heuristic.go
new file mode 100644
index 00000000000..94abb6400a6
--- /dev/null
+++ b/vendor/github.com/mmcloughlin/addchain/alg/heuristic/heuristic.go
@@ -0,0 +1,234 @@
+// Package heuristic implements heuristic-based addition sequence algorithms
+// with the Bos-Coster Makesequence structure.
+package heuristic
+
+import (
+	"errors"
+	"fmt"
+	"math/big"
+	"strings"
+
+	"github.com/mmcloughlin/addchain"
+	"github.com/mmcloughlin/addchain/internal/bigint"
+	"github.com/mmcloughlin/addchain/internal/bigints"
+)
+
+// References:
+//
+//	[boscoster]                Bos, Jurjen and Coster, Matthijs. Addition Chain Heuristics. In Advances in
+//	                           Cryptology --- CRYPTO' 89 Proceedings, pages 400--407. 1990.
+//	                           https://link.springer.com/content/pdf/10.1007/0-387-34805-0_37.pdf
+//	[github:kwantam/addchain]  Riad S. Wahby. kwantam/addchain. Github Repository. Apache License, Version 2.0.
+//	                           2018. https://github.com/kwantam/addchain
+//	[hehcc:exp]                Christophe Doche. Exponentiation. Handbook of Elliptic and Hyperelliptic Curve
+//	                           Cryptography, chapter 9. 2006.
+//	                           http://koclab.cs.ucsb.edu/teaching/ecc/eccPapers/Doche-ch09.pdf
+//	[modboscoster]             Ayan Nandy. Modifications of Bos and Coster’s Heuristics in search of a
+//	                           shorter addition chain for faster exponentiation. Masters thesis, Indian
+//	                           Statistical Institute Kolkata. 2011.
+//	                           http://library.isical.ac.in:8080/jspui/bitstream/10263/6441/1/DISS-285.pdf
+//	[mpnt]                     F. L. Ţiplea, S. Iftene, C. Hriţcu, I. Goriac, R. Gordân and E. Erbiceanu.
+//	                           MpNT: A Multi-Precision Number Theory Package, Number Theoretical Algorithms
+//	                           (I). Technical Report TR03-02, Faculty of Computer Science, "Alexandru Ioan
+//	                           Cuza" University, Iasi. 2003. https://profs.info.uaic.ro/~tr/tr03-02.pdf
+//	[speedsubgroup]            Stam, Martijn. Speeding up subgroup cryptosystems. PhD thesis, Technische
+//	                           Universiteit Eindhoven. 2003. https://cr.yp.to/bib/2003/stam-thesis.pdf
+
+// Heuristic suggests insertions given a current protosequence.
+type Heuristic interface {
+	// Suggest insertions given a target and protosequence f. Protosequence must
+	// contain sorted distinct integers.
+	Suggest(f []*big.Int, target *big.Int) []*big.Int
+
+	// String returns a name for the heuristic.
+	String() string
+}
+
+// Algorithm searches for an addition sequence using a heuristic at each step.
+// This implements the framework given in [mpnt], page 63, with the heuristic
+// playing the role of the "newnumbers" function.
+type Algorithm struct {
+	heuristic Heuristic
+}
+
+// NewAlgorithm builds a heuristic algorithm.
+func NewAlgorithm(h Heuristic) *Algorithm {
+	return &Algorithm{
+		heuristic: h,
+	}
+}
+
+func (h Algorithm) String() string {
+	return fmt.Sprintf("heuristic(%v)", h.heuristic)
+}
+
+// FindSequence searches for an addition sequence for the given targets.
+func (h Algorithm) FindSequence(targets []*big.Int) (addchain.Chain, error) {
+	// Skip the special case when targets is just {1}.
+	if len(targets) == 1 && bigint.EqualInt64(targets[0], 1) {
+		return targets, nil
+	}
+
+	// Initialize protosequence.
+	leader := bigints.Int64s(1, 2)
+	proto := append(leader, targets...)
+	bigints.Sort(proto)
+	proto = bigints.Unique(proto)
+	c := []*big.Int{}
+
+	for len(proto) > 2 {
+		// Pop the target element.
+		top := len(proto) - 1
+		target := proto[top]
+		proto = proto[:top]
+		c = bigints.InsertSortedUnique(c, target)
+
+		// Apply heuristic.
+		insert := h.heuristic.Suggest(proto, target)
+		if insert == nil {
+			return nil, errors.New("failed to find sequence")
+		}
+
+		// Update protosequence.
+		proto = bigints.MergeUnique(proto, insert)
+	}
+
+	// Prepare the chain to return.
+	c = bigints.MergeUnique(leader, c)
+
+	return addchain.Chain(c), nil
+}
+
+// DeltaLargest implements the simple heuristic of adding the delta between the
+// largest two entries in the protosequence.
+type DeltaLargest struct{}
+
+func (DeltaLargest) String() string { return "delta_largest" }
+
+// Suggest proposes inserting target-max(f).
+func (DeltaLargest) Suggest(f []*big.Int, target *big.Int) []*big.Int {
+	n := len(f)
+	delta := new(big.Int).Sub(target, f[n-1])
+	if delta.Sign() <= 0 {
+		panic("delta must be positive")
+	}
+	return []*big.Int{delta}
+}
+
+// Approximation is the "Approximation" heuristic from [boscoster].
+type Approximation struct{}
+
+func (Approximation) String() string { return "approximation" }
+
+// Suggest applies the "Approximation" heuristic. This heuristic looks for two
+// elements a, b in the list that sum to something close to the target element
+// f. That is, we look for f-(a+b) = epsilon where a ⩽ b and epsilon is a
+// "small" positive value.
+func (Approximation) Suggest(f []*big.Int, target *big.Int) []*big.Int {
+	delta := new(big.Int)
+	insert := new(big.Int)
+	mindelta := new(big.Int)
+	best := new(big.Int)
+	first := true
+
+	// Leverage the fact that f contains sorted distinct integers to apply a
+	// linear algorithm, similar to the 2-SUM problem.  Maintain left and right
+	// pointers and adjust them based on whether the sum is above or below the
+	// target.
+	for l, r := 0, len(f)-1; l <= r; {
+		a, b := f[l], f[r]
+
+		// Compute the delta f-(a+b).
+		delta.Add(a, b)
+		delta.Sub(target, delta)
+		if delta.Sign() < 0 {
+			// Sum exceeds target, decrement r for smaller b value.
+			r--
+			continue
+		}
+
+		// Proposed insertion is a+delta.
+		insert.Add(a, delta)
+
+		// If it's actually in the sequence already, use it.
+		if bigints.ContainsSorted(insert, f) {
+			return []*big.Int{insert}
+		}
+
+		// Keep it if its the closest we've seen.
+		if first || delta.Cmp(mindelta) < 0 {
+			mindelta.Set(delta)
+			best.Set(insert)
+			first = false
+		}
+
+		// Advance to next a value.
+		l++
+	}
+
+	return []*big.Int{best}
+}
+
+// Halving is the "Halving" heuristic from [boscoster].
+type Halving struct{}
+
+func (Halving) String() string { return "halving" }
+
+// Suggest applies when the target is at least twice as big as the next largest.
+// If so it will return a sequence of doublings to insert. Otherwise it will
+// return nil.
+func (Halving) Suggest(f []*big.Int, target *big.Int) []*big.Int {
+	n := len(f)
+	max, next := target, f[n-1]
+
+	// Check the condition f / f₁ ⩾ 2ᵘ
+	r := new(big.Int).Div(max, next)
+	if r.BitLen() < 2 {
+		return nil
+	}
+	u := r.BitLen() - 1
+
+	// Compute k = floor( f / 2ᵘ ).
+	k := new(big.Int).Rsh(max, uint(u))
+
+	// Proposal to insert:
+	// Delta d = f - k*2ᵘ
+	// Sequence k, 2*k, ..., k*2ᵘ
+	kshifts := []*big.Int{}
+	for e := 0; e <= u; e++ {
+		kshift := new(big.Int).Lsh(k, uint(e))
+		kshifts = append(kshifts, kshift)
+	}
+	d := new(big.Int).Sub(max, kshifts[u])
+	if bigint.IsZero(d) {
+		return kshifts[:u]
+	}
+
+	return bigints.InsertSortedUnique(kshifts, d)
+}
+
+// UseFirst builds a compositite heuristic that will make the first non-nil
+// suggestion from the sub-heuristics.
+func UseFirst(heuristics ...Heuristic) Heuristic {
+	return useFirst(heuristics)
+}
+
+type useFirst []Heuristic
+
+func (h useFirst) String() string {
+	names := []string{}
+	for _, sub := range h {
+		names = append(names, sub.String())
+	}
+	return "use_first(" + strings.Join(names, ",") + ")"
+}
+
+// Suggest delegates to each sub-heuristic in turn and returns the first non-nil suggestion.
+func (h useFirst) Suggest(f []*big.Int, target *big.Int) []*big.Int {
+	for _, heuristic := range h {
+		if insert := heuristic.Suggest(f, target); insert != nil {
+			return insert
+		}
+	}
+	return nil
+}
diff --git a/vendor/github.com/mmcloughlin/addchain/alg/opt/opt.go b/vendor/github.com/mmcloughlin/addchain/alg/opt/opt.go
new file mode 100644
index 00000000000..bf28cb9062a
--- /dev/null
+++ b/vendor/github.com/mmcloughlin/addchain/alg/opt/opt.go
@@ -0,0 +1,101 @@
+// Package opt implements generic optimizations that remove redundancy from addition chains.
+package opt
+
+import (
+	"fmt"
+	"math/big"
+
+	"github.com/mmcloughlin/addchain"
+	"github.com/mmcloughlin/addchain/alg"
+)
+
+// Algorithm applies chain optimization to the result of a wrapped algorithm.
+type Algorithm struct {
+	Algorithm alg.ChainAlgorithm
+}
+
+func (a Algorithm) String() string {
+	return fmt.Sprintf("opt(%s)", a.Algorithm)
+}
+
+// FindChain delegates to the wrapped algorithm, then runs Optimize on the result.
+func (a Algorithm) FindChain(n *big.Int) (addchain.Chain, error) {
+	c, err := a.Algorithm.FindChain(n)
+	if err != nil {
+		return nil, err
+	}
+
+	opt, err := Optimize(c)
+	if err != nil {
+		return nil, err
+	}
+
+	return opt, nil
+}
+
+// Optimize aims to remove redundancy from an addition chain.
+func Optimize(c addchain.Chain) (addchain.Chain, error) {
+	// Build program for c with all possible options at each step.
+	ops := make([][]addchain.Op, len(c))
+	for k := 1; k < len(c); k++ {
+		ops[k] = c.Ops(k)
+	}
+
+	// Count how many times each index is used where it is the only available Op.
+	counts := make([]int, len(c))
+	for k := 1; k < len(c); k++ {
+		if len(ops[k]) != 1 {
+			continue
+		}
+		for _, i := range ops[k][0].Operands() {
+			counts[i]++
+		}
+	}
+
+	// Now, try to remove the positions which are never the only available op.
+	remove := []int{}
+	for k := 1; k < len(c)-1; k++ {
+		if counts[k] > 0 {
+			continue
+		}
+
+		// Prune places k is used.
+		for l := k + 1; l < len(c); l++ {
+			ops[l] = pruneuses(ops[l], k)
+
+			// If this list now only has one element, the operands in it are now
+			// indispensable.
+			if len(ops[l]) == 1 {
+				for _, i := range ops[l][0].Operands() {
+					counts[i]++
+				}
+			}
+		}
+
+		// Mark k for deletion.
+		remove = append(remove, k)
+	}
+
+	// Perform removals.
+	pruned := addchain.Chain{}
+	for i, x := range c {
+		if len(remove) > 0 && remove[0] == i {
+			remove = remove[1:]
+			continue
+		}
+		pruned = append(pruned, x)
+	}
+
+	return pruned, nil
+}
+
+// pruneuses removes any uses of i from the list of operations.
+func pruneuses(ops []addchain.Op, i int) []addchain.Op {
+	filtered := ops[:0]
+	for _, op := range ops {
+		if !op.Uses(i) {
+			filtered = append(filtered, op)
+		}
+	}
+	return filtered
+}
diff --git a/vendor/github.com/mmcloughlin/addchain/chain.go b/vendor/github.com/mmcloughlin/addchain/chain.go
new file mode 100644
index 00000000000..3d41657ebc0
--- /dev/null
+++ b/vendor/github.com/mmcloughlin/addchain/chain.go
@@ -0,0 +1,194 @@
+// Package addchain provides addition chain types and operations on them.
+package addchain
+
+import (
+	"errors"
+	"fmt"
+	"math/big"
+
+	"github.com/mmcloughlin/addchain/internal/bigint"
+	"github.com/mmcloughlin/addchain/internal/bigints"
+)
+
+// References:
+//
+//	[efficientcompaddchain]  Bergeron, F., Berstel, J. and Brlek, S. Efficient computation of addition
+//	                         chains. Journal de theorie des nombres de Bordeaux. 1994.
+//	                         http://www.numdam.org/item/JTNB_1994__6_1_21_0
+//	[knuth]                  Knuth, Donald E. Evaluation of Powers. The Art of Computer Programming, Volume 2
+//	                         (Third Edition): Seminumerical Algorithms, chapter 4.6.3. 1997.
+//	                         https://www-cs-faculty.stanford.edu/~knuth/taocp.html
+
+// Chain is an addition chain.
+type Chain []*big.Int
+
+// New constructs the minimal chain {1}.
+func New() Chain {
+	return Chain{big.NewInt(1)}
+}
+
+// Int64s builds a chain from the given int64 values.
+func Int64s(xs ...int64) Chain {
+	return Chain(bigints.Int64s(xs...))
+}
+
+// Clone the chain.
+func (c Chain) Clone() Chain {
+	return bigints.Clone(c)
+}
+
+// AppendClone appends a copy of x to c.
+func (c *Chain) AppendClone(x *big.Int) {
+	*c = append(*c, bigint.Clone(x))
+}
+
+// End returns the last element of the chain.
+func (c Chain) End() *big.Int {
+	return c[len(c)-1]
+}
+
+// Ops returns all operations that produce the kth position. This could be empty
+// for an invalid chain.
+func (c Chain) Ops(k int) []Op {
+	ops := []Op{}
+	s := new(big.Int)
+
+	// If the prefix is ascending this can be done in linear time.
+	if c[:k].IsAscending() {
+		for l, r := 0, k-1; l <= r; {
+			s.Add(c[l], c[r])
+			cmp := s.Cmp(c[k])
+			if cmp == 0 {
+				ops = append(ops, Op{l, r})
+			}
+			if cmp <= 0 {
+				l++
+			} else {
+				r--
+			}
+		}
+		return ops
+	}
+
+	// Fallback to quadratic.
+	for i := 0; i < k; i++ {
+		for j := i; j < k; j++ {
+			s.Add(c[i], c[j])
+			if s.Cmp(c[k]) == 0 {
+				ops = append(ops, Op{i, j})
+			}
+		}
+	}
+
+	return ops
+}
+
+// Op returns an Op that produces the kth position.
+func (c Chain) Op(k int) (Op, error) {
+	ops := c.Ops(k)
+	if len(ops) == 0 {
+		return Op{}, fmt.Errorf("position %d is not the sum of previous entries", k)
+	}
+	return ops[0], nil
+}
+
+// Program produces a program that generates the chain.
+func (c Chain) Program() (Program, error) {
+	// Sanity checks.
+	if len(c) == 0 {
+		return nil, errors.New("chain empty")
+	}
+
+	if c[0].Cmp(big.NewInt(1)) != 0 {
+		return nil, errors.New("chain must start with 1")
+	}
+
+	if bigints.Contains(bigint.Zero(), c) {
+		return nil, errors.New("chain contains zero")
+	}
+
+	for i := 0; i < len(c); i++ {
+		for j := i + 1; j < len(c); j++ {
+			if bigint.Equal(c[i], c[j]) {
+				return nil, fmt.Errorf("chain contains duplicate: %v at positions %d and %d", c[i], i, j)
+			}
+		}
+	}
+
+	// Produce the program.
+	p := Program{}
+	for k := 1; k < len(c); k++ {
+		op, err := c.Op(k)
+		if err != nil {
+			return nil, err
+		}
+		p = append(p, op)
+	}
+
+	return p, nil
+}
+
+// Validate checks that c is in fact an addition chain.
+func (c Chain) Validate() error {
+	_, err := c.Program()
+	return err
+}
+
+// Produces checks that c is a valid chain ending with target.
+func (c Chain) Produces(target *big.Int) error {
+	if err := c.Validate(); err != nil {
+		return err
+	}
+	if c.End().Cmp(target) != 0 {
+		return errors.New("chain does not end with target")
+	}
+	return nil
+}
+
+// Superset checks that c is a valid chain containing all the targets.
+func (c Chain) Superset(targets []*big.Int) error {
+	if err := c.Validate(); err != nil {
+		return err
+	}
+	for _, target := range targets {
+		if !bigints.Contains(target, c) {
+			return fmt.Errorf("chain does not contain %v", target)
+		}
+	}
+	return nil
+}
+
+// IsAscending reports whether the chain is ascending, that is if it's in sorted
+// order without repeats, as defined in [knuth] Section 4.6.3 formula (11).
+// Does not fully validate the chain, only that it is ascending.
+func (c Chain) IsAscending() bool {
+	if len(c) == 0 || !bigint.EqualInt64(c[0], 1) {
+		return false
+	}
+	for i := 1; i < len(c); i++ {
+		if c[i-1].Cmp(c[i]) >= 0 {
+			return false
+		}
+	}
+	return true
+}
+
+// Product computes the product of two addition chains. The is the "o times"
+// operator defined in [efficientcompaddchain] Section 2.
+func Product(a, b Chain) Chain {
+	c := a.Clone()
+	last := c.End()
+	for _, x := range b[1:] {
+		y := new(big.Int).Mul(last, x)
+		c = append(c, y)
+	}
+	return c
+}
+
+// Plus adds x to the addition chain. This is the "o plus" operator defined in
+// [efficientcompaddchain] Section 2.
+func Plus(a Chain, x *big.Int) Chain {
+	c := a.Clone()
+	y := new(big.Int).Add(c.End(), x)
+	return append(c, y)
+}
diff --git a/vendor/github.com/mmcloughlin/addchain/codecov.yml b/vendor/github.com/mmcloughlin/addchain/codecov.yml
new file mode 100644
index 00000000000..35cde5cd5e8
--- /dev/null
+++ b/vendor/github.com/mmcloughlin/addchain/codecov.yml
@@ -0,0 +1,4 @@
+coverage:
+  status:
+    project: off
+    patch: off
diff --git a/vendor/github.com/mmcloughlin/addchain/go.mod b/vendor/github.com/mmcloughlin/addchain/go.mod
new file mode 100644
index 00000000000..fc91d5d274d
--- /dev/null
+++ b/vendor/github.com/mmcloughlin/addchain/go.mod
@@ -0,0 +1,8 @@
+module github.com/mmcloughlin/addchain
+
+go 1.16
+
+require (
+	github.com/google/subcommands v1.2.0
+	github.com/mmcloughlin/profile v0.1.1
+)
diff --git a/vendor/github.com/mmcloughlin/addchain/go.sum b/vendor/github.com/mmcloughlin/addchain/go.sum
new file mode 100644
index 00000000000..0111b2abf38
--- /dev/null
+++ b/vendor/github.com/mmcloughlin/addchain/go.sum
@@ -0,0 +1,4 @@
+github.com/google/subcommands v1.2.0 h1:vWQspBTo2nEqTUFita5/KeEWlUL8kQObDFbub/EN9oE=
+github.com/google/subcommands v1.2.0/go.mod h1:ZjhPrFU+Olkh9WazFPsl27BQ4UPiG37m3yTrtFlrHVk=
+github.com/mmcloughlin/profile v0.1.1 h1:jhDmAqPyebOsVDOCICJoINoLb/AnLBaUw58nFzxWS2w=
+github.com/mmcloughlin/profile v0.1.1/go.mod h1:IhHD7q1ooxgwTgjxQYkACGA77oFTDdFVejUS1/tS/qU=
diff --git a/vendor/github.com/mmcloughlin/addchain/install.sh b/vendor/github.com/mmcloughlin/addchain/install.sh
new file mode 100644
index 00000000000..6ea84cbc9e9
--- /dev/null
+++ b/vendor/github.com/mmcloughlin/addchain/install.sh
@@ -0,0 +1,378 @@
+#!/bin/sh
+set -e
+# Code generated by godownloader. DO NOT EDIT.
+#
+
+usage() {
+  this=$1
+  cat <<EOF
+$this: download go binaries for mmcloughlin/addchain
+
+Usage: $this [-b] bindir [-d] [tag]
+  -b sets bindir or installation directory, Defaults to ./bin
+  -d turns on debug logging
+   [tag] is a tag from
+   https://github.com/mmcloughlin/addchain/releases
+   If tag is missing, then the latest will be used.
+
+ Generated by godownloader
+  https://github.com/goreleaser/godownloader
+
+EOF
+  exit 2
+}
+
+parse_args() {
+  #BINDIR is ./bin unless set be ENV
+  # over-ridden by flag below
+
+  BINDIR=${BINDIR:-./bin}
+  while getopts "b:dh?x" arg; do
+    case "$arg" in
+      b) BINDIR="$OPTARG" ;;
+      d) log_set_priority 10 ;;
+      h | \?) usage "$0" ;;
+      x) set -x ;;
+    esac
+  done
+  shift $((OPTIND - 1))
+  TAG=$1
+}
+# this function wraps all the destructive operations
+# if a curl|bash cuts off the end of the script due to
+# network, either nothing will happen or will syntax error
+# out preventing half-done work
+execute() {
+  tmpdir=$(mktemp -d)
+  log_debug "downloading files into ${tmpdir}"
+  http_download "${tmpdir}/${TARBALL}" "${TARBALL_URL}"
+  http_download "${tmpdir}/${CHECKSUM}" "${CHECKSUM_URL}"
+  hash_sha256_verify "${tmpdir}/${TARBALL}" "${tmpdir}/${CHECKSUM}"
+  srcdir="${tmpdir}"
+  (cd "${tmpdir}" && untar "${TARBALL}")
+  test ! -d "${BINDIR}" && install -d "${BINDIR}"
+  for binexe in $BINARIES; do
+    if [ "$OS" = "windows" ]; then
+      binexe="${binexe}.exe"
+    fi
+    install "${srcdir}/${binexe}" "${BINDIR}/"
+    log_info "installed ${BINDIR}/${binexe}"
+  done
+  rm -rf "${tmpdir}"
+}
+get_binaries() {
+  case "$PLATFORM" in
+    darwin/amd64) BINARIES="addchain" ;;
+    darwin/arm64) BINARIES="addchain" ;;
+    linux/amd64) BINARIES="addchain" ;;
+    linux/arm64) BINARIES="addchain" ;;
+    windows/amd64) BINARIES="addchain" ;;
+    windows/arm64) BINARIES="addchain" ;;
+    *)
+      log_crit "platform $PLATFORM is not supported.  Make sure this script is up-to-date and file request at https://github.com/${PREFIX}/issues/new"
+      exit 1
+      ;;
+  esac
+}
+tag_to_version() {
+  if [ -z "${TAG}" ]; then
+    log_info "checking GitHub for latest tag"
+  else
+    log_info "checking GitHub for tag '${TAG}'"
+  fi
+  REALTAG=$(github_release "$OWNER/$REPO" "${TAG}") && true
+  if test -z "$REALTAG"; then
+    log_crit "unable to find '${TAG}' - use 'latest' or see https://github.com/${PREFIX}/releases for details"
+    exit 1
+  fi
+  # if version starts with 'v', remove it
+  TAG="$REALTAG"
+  VERSION=${TAG#v}
+}
+adjust_format() {
+  # change format (tar.gz or zip) based on OS
+  case ${OS} in
+    windows) FORMAT=zip ;;
+  esac
+  true
+}
+adjust_os() {
+  # adjust archive name based on OS
+  true
+}
+adjust_arch() {
+  # adjust archive name based on ARCH
+  true
+}
+
+cat /dev/null <<EOF
+------------------------------------------------------------------------
+https://github.com/client9/shlib - portable posix shell functions
+Public domain - http://unlicense.org
+https://github.com/client9/shlib/blob/master/LICENSE.md
+but credit (and pull requests) appreciated.
+------------------------------------------------------------------------
+EOF
+is_command() {
+  command -v "$1" >/dev/null
+}
+echoerr() {
+  echo "$@" 1>&2
+}
+log_prefix() {
+  echo "$0"
+}
+_logp=6
+log_set_priority() {
+  _logp="$1"
+}
+log_priority() {
+  if test -z "$1"; then
+    echo "$_logp"
+    return
+  fi
+  [ "$1" -le "$_logp" ]
+}
+log_tag() {
+  case $1 in
+    0) echo "emerg" ;;
+    1) echo "alert" ;;
+    2) echo "crit" ;;
+    3) echo "err" ;;
+    4) echo "warning" ;;
+    5) echo "notice" ;;
+    6) echo "info" ;;
+    7) echo "debug" ;;
+    *) echo "$1" ;;
+  esac
+}
+log_debug() {
+  log_priority 7 || return 0
+  echoerr "$(log_prefix)" "$(log_tag 7)" "$@"
+}
+log_info() {
+  log_priority 6 || return 0
+  echoerr "$(log_prefix)" "$(log_tag 6)" "$@"
+}
+log_err() {
+  log_priority 3 || return 0
+  echoerr "$(log_prefix)" "$(log_tag 3)" "$@"
+}
+log_crit() {
+  log_priority 2 || return 0
+  echoerr "$(log_prefix)" "$(log_tag 2)" "$@"
+}
+uname_os() {
+  os=$(uname -s | tr '[:upper:]' '[:lower:]')
+  case "$os" in
+    cygwin_nt*) os="windows" ;;
+    mingw*) os="windows" ;;
+    msys_nt*) os="windows" ;;
+  esac
+  echo "$os"
+}
+uname_arch() {
+  arch=$(uname -m)
+  case $arch in
+    x86_64) arch="amd64" ;;
+    x86) arch="386" ;;
+    i686) arch="386" ;;
+    i386) arch="386" ;;
+    aarch64) arch="arm64" ;;
+    armv5*) arch="armv5" ;;
+    armv6*) arch="armv6" ;;
+    armv7*) arch="armv7" ;;
+  esac
+  echo ${arch}
+}
+uname_os_check() {
+  os=$(uname_os)
+  case "$os" in
+    darwin) return 0 ;;
+    dragonfly) return 0 ;;
+    freebsd) return 0 ;;
+    linux) return 0 ;;
+    android) return 0 ;;
+    nacl) return 0 ;;
+    netbsd) return 0 ;;
+    openbsd) return 0 ;;
+    plan9) return 0 ;;
+    solaris) return 0 ;;
+    windows) return 0 ;;
+  esac
+  log_crit "uname_os_check '$(uname -s)' got converted to '$os' which is not a GOOS value. Please file bug at https://github.com/client9/shlib"
+  return 1
+}
+uname_arch_check() {
+  arch=$(uname_arch)
+  case "$arch" in
+    386) return 0 ;;
+    amd64) return 0 ;;
+    arm64) return 0 ;;
+    armv5) return 0 ;;
+    armv6) return 0 ;;
+    armv7) return 0 ;;
+    ppc64) return 0 ;;
+    ppc64le) return 0 ;;
+    mips) return 0 ;;
+    mipsle) return 0 ;;
+    mips64) return 0 ;;
+    mips64le) return 0 ;;
+    s390x) return 0 ;;
+    amd64p32) return 0 ;;
+  esac
+  log_crit "uname_arch_check '$(uname -m)' got converted to '$arch' which is not a GOARCH value.  Please file bug report at https://github.com/client9/shlib"
+  return 1
+}
+untar() {
+  tarball=$1
+  case "${tarball}" in
+    *.tar.gz | *.tgz) tar --no-same-owner -xzf "${tarball}" ;;
+    *.tar) tar --no-same-owner -xf "${tarball}" ;;
+    *.zip) unzip "${tarball}" ;;
+    *)
+      log_err "untar unknown archive format for ${tarball}"
+      return 1
+      ;;
+  esac
+}
+http_download_curl() {
+  local_file=$1
+  source_url=$2
+  header=$3
+  if [ -z "$header" ]; then
+    code=$(curl -w '%{http_code}' -sL -o "$local_file" "$source_url")
+  else
+    code=$(curl -w '%{http_code}' -sL -H "$header" -o "$local_file" "$source_url")
+  fi
+  if [ "$code" != "200" ]; then
+    log_debug "http_download_curl received HTTP status $code"
+    return 1
+  fi
+  return 0
+}
+http_download_wget() {
+  local_file=$1
+  source_url=$2
+  header=$3
+  if [ -z "$header" ]; then
+    wget -q -O "$local_file" "$source_url"
+  else
+    wget -q --header "$header" -O "$local_file" "$source_url"
+  fi
+}
+http_download() {
+  log_debug "http_download $2"
+  if is_command curl; then
+    http_download_curl "$@"
+    return
+  elif is_command wget; then
+    http_download_wget "$@"
+    return
+  fi
+  log_crit "http_download unable to find wget or curl"
+  return 1
+}
+http_copy() {
+  tmp=$(mktemp)
+  http_download "${tmp}" "$1" "$2" || return 1
+  body=$(cat "$tmp")
+  rm -f "${tmp}"
+  echo "$body"
+}
+github_release() {
+  owner_repo=$1
+  version=$2
+  test -z "$version" && version="latest"
+  giturl="https://github.com/${owner_repo}/releases/${version}"
+  json=$(http_copy "$giturl" "Accept:application/json")
+  test -z "$json" && return 1
+  version=$(echo "$json" | tr -s '\n' ' ' | sed 's/.*"tag_name":"//' | sed 's/".*//')
+  test -z "$version" && return 1
+  echo "$version"
+}
+hash_sha256() {
+  TARGET=${1:-/dev/stdin}
+  if is_command gsha256sum; then
+    hash=$(gsha256sum "$TARGET") || return 1
+    echo "$hash" | cut -d ' ' -f 1
+  elif is_command sha256sum; then
+    hash=$(sha256sum "$TARGET") || return 1
+    echo "$hash" | cut -d ' ' -f 1
+  elif is_command shasum; then
+    hash=$(shasum -a 256 "$TARGET" 2>/dev/null) || return 1
+    echo "$hash" | cut -d ' ' -f 1
+  elif is_command openssl; then
+    hash=$(openssl -dst openssl dgst -sha256 "$TARGET") || return 1
+    echo "$hash" | cut -d ' ' -f a
+  else
+    log_crit "hash_sha256 unable to find command to compute sha-256 hash"
+    return 1
+  fi
+}
+hash_sha256_verify() {
+  TARGET=$1
+  checksums=$2
+  if [ -z "$checksums" ]; then
+    log_err "hash_sha256_verify checksum file not specified in arg2"
+    return 1
+  fi
+  BASENAME=${TARGET##*/}
+  want=$(grep "${BASENAME}" "${checksums}" 2>/dev/null | tr '\t' ' ' | cut -d ' ' -f 1)
+  if [ -z "$want" ]; then
+    log_err "hash_sha256_verify unable to find checksum for '${TARGET}' in '${checksums}'"
+    return 1
+  fi
+  got=$(hash_sha256 "$TARGET")
+  if [ "$want" != "$got" ]; then
+    log_err "hash_sha256_verify checksum for '$TARGET' did not verify ${want} vs $got"
+    return 1
+  fi
+}
+cat /dev/null <<EOF
+------------------------------------------------------------------------
+End of functions from https://github.com/client9/shlib
+------------------------------------------------------------------------
+EOF
+
+PROJECT_NAME="addchain"
+OWNER=mmcloughlin
+REPO="addchain"
+BINARY=addchain
+FORMAT=tar.gz
+OS=$(uname_os)
+ARCH=$(uname_arch)
+PREFIX="$OWNER/$REPO"
+
+# use in logging routines
+log_prefix() {
+	echo "$PREFIX"
+}
+PLATFORM="${OS}/${ARCH}"
+GITHUB_DOWNLOAD=https://github.com/${OWNER}/${REPO}/releases/download
+
+uname_os_check "$OS"
+uname_arch_check "$ARCH"
+
+parse_args "$@"
+
+get_binaries
+
+tag_to_version
+
+adjust_format
+
+adjust_os
+
+adjust_arch
+
+log_info "found version: ${VERSION} for ${TAG}/${OS}/${ARCH}"
+
+NAME=${PROJECT_NAME}_${VERSION}_${OS}_${ARCH}
+TARBALL=${NAME}.${FORMAT}
+TARBALL_URL=${GITHUB_DOWNLOAD}/${TAG}/${TARBALL}
+CHECKSUM=${PROJECT_NAME}_${VERSION}_checksums.txt
+CHECKSUM_URL=${GITHUB_DOWNLOAD}/${TAG}/${CHECKSUM}
+
+
+execute
diff --git a/vendor/github.com/mmcloughlin/addchain/internal/bigint/bigint.go b/vendor/github.com/mmcloughlin/addchain/internal/bigint/bigint.go
new file mode 100644
index 00000000000..40c01c7ea21
--- /dev/null
+++ b/vendor/github.com/mmcloughlin/addchain/internal/bigint/bigint.go
@@ -0,0 +1,169 @@
+// Package bigint provides common functions for manipulating multi-precision integers.
+package bigint
+
+import (
+	"math/big"
+	"math/rand"
+	"strings"
+)
+
+// Zero returns 0.
+func Zero() *big.Int {
+	return big.NewInt(0)
+}
+
+// One returns 1.
+func One() *big.Int {
+	return big.NewInt(1)
+}
+
+// Hex constructs an integer from a hex string, returning the integer and a
+// boolean indicating success. Underscore may be used as a separator.
+func Hex(s string) (*big.Int, bool) {
+	return new(big.Int).SetString(stripliteral(s), 16)
+}
+
+// MustHex constructs an integer from a hex string. It panics on error.
+func MustHex(s string) *big.Int {
+	x, ok := Hex(s)
+	if !ok {
+		panic("failed to parse hex integer")
+	}
+	return x
+}
+
+// Binary parses a binary string into an integer, returning the integer and a
+// boolean indicating success. Underscore may be used as a separator.
+func Binary(s string) (*big.Int, bool) {
+	return new(big.Int).SetString(stripliteral(s), 2)
+}
+
+// MustBinary constructs an integer from a binary string. It panics on error.
+func MustBinary(s string) *big.Int {
+	x, ok := Binary(s)
+	if !ok {
+		panic("failed to parse binary integer")
+	}
+	return x
+}
+
+// stripliteral removes underscore spacers from a numeric literal.
+func stripliteral(s string) string {
+	return strings.ReplaceAll(s, "_", "")
+}
+
+// Equal returns whether x equals y.
+func Equal(x, y *big.Int) bool {
+	return x.Cmp(y) == 0
+}
+
+// EqualInt64 is a convenience for checking if x equals the int64 value y.
+func EqualInt64(x *big.Int, y int64) bool {
+	return Equal(x, big.NewInt(y))
+}
+
+// IsZero returns true if x is zero.
+func IsZero(x *big.Int) bool {
+	return x.Sign() == 0
+}
+
+// IsNonZero returns true if x is non-zero.
+func IsNonZero(x *big.Int) bool {
+	return !IsZero(x)
+}
+
+// Clone returns a copy of x.
+func Clone(x *big.Int) *big.Int {
+	return new(big.Int).Set(x)
+}
+
+// Pow2 returns 2ᵉ.
+func Pow2(e uint) *big.Int {
+	return new(big.Int).Lsh(One(), e)
+}
+
+// IsPow2 returns whether x is a power of 2.
+func IsPow2(x *big.Int) bool {
+	e := x.BitLen()
+	if e == 0 {
+		return false
+	}
+	return Equal(x, Pow2(uint(e-1)))
+}
+
+// Pow2UpTo returns all powers of two ⩽ x.
+func Pow2UpTo(x *big.Int) []*big.Int {
+	p := One()
+	ps := []*big.Int{}
+	for p.Cmp(x) <= 0 {
+		ps = append(ps, Clone(p))
+		p.Lsh(p, 1)
+	}
+	return ps
+}
+
+// Mask returns the integer with 1s in positions [l,h).
+func Mask(l, h uint) *big.Int {
+	mask := Pow2(h)
+	return mask.Sub(mask, Pow2(l))
+}
+
+// Ones returns 2ⁿ - 1, the integer with n 1s in the low bits.
+func Ones(n uint) *big.Int {
+	return Mask(0, n)
+}
+
+// BitsSet returns the positions of set bits in x.
+func BitsSet(x *big.Int) []int {
+	set := []int{}
+	for i := 0; i < x.BitLen(); i++ {
+		if x.Bit(i) == 1 {
+			set = append(set, i)
+		}
+	}
+	return set
+}
+
+// MinMax returns the minimum and maximum of x and y.
+func MinMax(x, y *big.Int) (min, max *big.Int) {
+	if x.Cmp(y) < 0 {
+		return x, y
+	}
+	return y, x
+}
+
+// Extract bits [l,h) and shift them to the low bits.
+func Extract(x *big.Int, l, h uint) *big.Int {
+	e := Mask(l, h)
+	e.And(e, x)
+	return e.Rsh(e, l)
+}
+
+// RandBits returns a random integer less than 2ⁿ.
+func RandBits(r *rand.Rand, n uint) *big.Int {
+	max := Pow2(n)
+	return new(big.Int).Rand(r, max)
+}
+
+// Uint64s represents x in 64-bit limbs.
+func Uint64s(x *big.Int) []uint64 {
+	z := Clone(x)
+	mask := Ones(64)
+	word := new(big.Int)
+	words := []uint64{}
+	for IsNonZero(z) {
+		word.And(z, mask)
+		words = append(words, word.Uint64())
+		z.Rsh(z, 64)
+	}
+	return words
+}
+
+// BytesLittleEndian returns the absolute value of x as a little-endian byte slice.
+func BytesLittleEndian(x *big.Int) []byte {
+	b := x.Bytes()
+	for l, r := 0, len(b)-1; l < r; l, r = l+1, r-1 {
+		b[l], b[r] = b[r], b[l]
+	}
+	return b
+}
diff --git a/vendor/github.com/mmcloughlin/addchain/internal/bigints/bigints.go b/vendor/github.com/mmcloughlin/addchain/internal/bigints/bigints.go
new file mode 100644
index 00000000000..acad6f8729e
--- /dev/null
+++ b/vendor/github.com/mmcloughlin/addchain/internal/bigints/bigints.go
@@ -0,0 +1,104 @@
+// Package bigints provides helpers for slices of multi-precision integers.
+package bigints
+
+import (
+	"math/big"
+	"sort"
+
+	"github.com/mmcloughlin/addchain/internal/bigint"
+)
+
+// Int64s converts a list of int64s into a slice of big integers.
+func Int64s(xs ...int64) []*big.Int {
+	bs := make([]*big.Int, len(xs))
+	for i, x := range xs {
+		bs[i] = big.NewInt(x)
+	}
+	return bs
+}
+
+// ascending sorts integers in ascending order.
+type ascending []*big.Int
+
+func (a ascending) Len() int           { return len(a) }
+func (a ascending) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
+func (a ascending) Less(i, j int) bool { return a[i].Cmp(a[j]) < 0 }
+
+// Sort in ascending order.
+func Sort(xs []*big.Int) {
+	sort.Sort(ascending(xs))
+}
+
+// Index returns the index of the first occurrence of n in xs, or -1 if it does not appear.
+func Index(n *big.Int, xs []*big.Int) int {
+	for i, x := range xs {
+		if bigint.Equal(n, x) {
+			return i
+		}
+	}
+	return -1
+}
+
+// Contains reports whether n is in xs.
+func Contains(n *big.Int, xs []*big.Int) bool {
+	return Index(n, xs) >= 0
+}
+
+// ContainsSorted reports whether n is in xs, which is assumed to be sorted.
+func ContainsSorted(n *big.Int, xs []*big.Int) bool {
+	i := sort.Search(len(xs), func(i int) bool { return xs[i].Cmp(n) >= 0 })
+	return i < len(xs) && bigint.Equal(xs[i], n)
+}
+
+// Clone a list of integers.
+func Clone(xs []*big.Int) []*big.Int {
+	return append([]*big.Int{}, xs...)
+}
+
+// Unique removes consecutive duplicates.
+func Unique(xs []*big.Int) []*big.Int {
+	if len(xs) == 0 {
+		return []*big.Int{}
+	}
+	u := make([]*big.Int, 1, len(xs))
+	u[0] = xs[0]
+	for _, x := range xs[1:] {
+		last := u[len(u)-1]
+		if !bigint.Equal(x, last) {
+			u = append(u, x)
+		}
+	}
+	return u
+}
+
+// InsertSortedUnique inserts an integer into a slice of sorted distinct
+// integers.
+func InsertSortedUnique(xs []*big.Int, x *big.Int) []*big.Int {
+	return MergeUnique([]*big.Int{x}, xs)
+}
+
+// MergeUnique merges two slices of sorted distinct integers. Elements in both
+// slices are deduplicated.
+func MergeUnique(xs, ys []*big.Int) []*big.Int {
+	r := make([]*big.Int, 0, len(xs)+len(ys))
+
+	for len(xs) > 0 && len(ys) > 0 {
+		switch xs[0].Cmp(ys[0]) {
+		case -1:
+			r = append(r, xs[0])
+			xs = xs[1:]
+		case 0:
+			r = append(r, xs[0])
+			xs = xs[1:]
+			ys = ys[1:]
+		case 1:
+			r = append(r, ys[0])
+			ys = ys[1:]
+		}
+	}
+
+	r = append(r, xs...)
+	r = append(r, ys...)
+
+	return r
+}
diff --git a/vendor/github.com/mmcloughlin/addchain/internal/bigvector/bigvector.go b/vendor/github.com/mmcloughlin/addchain/internal/bigvector/bigvector.go
new file mode 100644
index 00000000000..bac75bd5f8c
--- /dev/null
+++ b/vendor/github.com/mmcloughlin/addchain/internal/bigvector/bigvector.go
@@ -0,0 +1,86 @@
+// Package bigvector implements operations on vectors of immutable
+// multi-precision integers.
+package bigvector
+
+import (
+	"math/big"
+
+	"github.com/mmcloughlin/addchain/internal/bigint"
+)
+
+// Vector of immutable multi-precision integers.
+type Vector interface {
+	// Len returns vector length.
+	Len() int
+
+	// Idx returns integer at index i. Returned integer must not be written to.
+	Idx(i int) *big.Int
+}
+
+// New constructs an n-dimensional zero vector.
+func New(n int) Vector {
+	return make(vector, n)
+}
+
+type vector []big.Int
+
+func (v vector) Len() int           { return len(v) }
+func (v vector) Idx(i int) *big.Int { return &v[i] }
+
+// NewBasis constructs an n-dimensional basis vector with a 1 in position i.
+func NewBasis(n, i int) Vector {
+	return basis{n: n, i: i}
+}
+
+// Basis implementation saves allocations by returning pre-allocated zero and
+// one integers based on the index requested.
+var (
+	zero = bigint.Zero()
+	one  = bigint.One()
+)
+
+type basis struct {
+	n int
+	i int
+}
+
+func (b basis) Len() int { return b.n }
+
+func (b basis) Idx(i int) *big.Int {
+	switch {
+	case i >= b.n:
+		panic("bigvector: index out of range")
+	case i == b.i:
+		return one
+	default:
+		return zero
+	}
+}
+
+// Add vectors.
+func Add(u, v Vector) Vector {
+	assertsamelen(u, v)
+	n := u.Len()
+	w := make(vector, n)
+	for i := 0; i < n; i++ {
+		w[i].Add(u.Idx(i), v.Idx(i))
+	}
+	return w
+}
+
+// Lsh left shifts every element of the vector v.
+func Lsh(v Vector, s uint) Vector {
+	n := v.Len()
+	w := make(vector, n)
+	for i := 0; i < n; i++ {
+		w[i].Lsh(v.Idx(i), s)
+	}
+	return w
+}
+
+// assertsamelen panics if u and v are different lengths.
+func assertsamelen(u, v Vector) {
+	if u.Len() != v.Len() {
+		panic("bigvector: length mismatch")
+	}
+}
diff --git a/vendor/github.com/mmcloughlin/addchain/internal/container/heap/heap.go b/vendor/github.com/mmcloughlin/addchain/internal/container/heap/heap.go
new file mode 100644
index 00000000000..9d396f50e28
--- /dev/null
+++ b/vendor/github.com/mmcloughlin/addchain/internal/container/heap/heap.go
@@ -0,0 +1,55 @@
+// Package heap implements a heap on specific types.
+package heap
+
+import "container/heap"
+
+// MinInts is a min-heap of integers.
+type MinInts struct {
+	h *intheap
+}
+
+// NewMinInts builds an empty integer min-heap.
+func NewMinInts() *MinInts {
+	return &MinInts{
+		h: &intheap{},
+	}
+}
+
+// Empty returns whether the heap is empty.
+func (h *MinInts) Empty() bool {
+	return h.Len() == 0
+}
+
+// Len returns the number of elements in the heap.
+func (h *MinInts) Len() int {
+	return h.h.Len()
+}
+
+// Push x onto the heap.
+func (h *MinInts) Push(x int) {
+	heap.Push(h.h, x)
+}
+
+// Pop the min element from the heap.
+func (h *MinInts) Pop() int {
+	return heap.Pop(h.h).(int)
+}
+
+type intheap struct {
+	x []int
+}
+
+func (h intheap) Len() int           { return len(h.x) }
+func (h intheap) Less(i, j int) bool { return h.x[i] < h.x[j] }
+func (h intheap) Swap(i, j int)      { h.x[i], h.x[j] = h.x[j], h.x[i] }
+
+func (h *intheap) Push(x interface{}) {
+	h.x = append(h.x, x.(int))
+}
+
+func (h *intheap) Pop() interface{} {
+	n := len(h.x)
+	x := h.x[n-1]
+	h.x = h.x[:n-1]
+	return x
+}
diff --git a/vendor/github.com/mmcloughlin/addchain/internal/errutil/errutil.go b/vendor/github.com/mmcloughlin/addchain/internal/errutil/errutil.go
new file mode 100644
index 00000000000..364724118ce
--- /dev/null
+++ b/vendor/github.com/mmcloughlin/addchain/internal/errutil/errutil.go
@@ -0,0 +1,28 @@
+// Package errutil implements common error types and helper functions.
+package errutil
+
+import (
+	"fmt"
+	"io"
+)
+
+// AssertionFailure is used for an error resulting from the failure of an
+// expected invariant.
+func AssertionFailure(format string, args ...interface{}) error {
+	return fmt.Errorf("assertion failure: "+format, args...)
+}
+
+// UnexpectedType builds an error for an unexpected type, typically in a type switch.
+func UnexpectedType(t interface{}) error {
+	return AssertionFailure("unexpected type %T", t)
+}
+
+// CheckClose closes c. If an error occurs it will be written to the error
+// pointer errp, if it doesn't already reference an error. This is intended to
+// allow you to properly check errors when defering a close call. In this case
+// the error pointer should be the address of a named error return.
+func CheckClose(errp *error, c io.Closer) {
+	if err := c.Close(); err != nil && *errp == nil {
+		*errp = err
+	}
+}
diff --git a/vendor/github.com/mmcloughlin/addchain/internal/print/printer.go b/vendor/github.com/mmcloughlin/addchain/internal/print/printer.go
new file mode 100644
index 00000000000..39bd4924977
--- /dev/null
+++ b/vendor/github.com/mmcloughlin/addchain/internal/print/printer.go
@@ -0,0 +1,104 @@
+// Package print provides helpers for structured output printing.
+package print
+
+import (
+	"fmt"
+	"io"
+	"strings"
+	"text/tabwriter"
+)
+
+// DefaultIndent is the default string for one level of indentation.
+const DefaultIndent = "\t"
+
+// Printer provides convenience methods for structured output printing.
+// Specifically it stores any errors encountered so error checking does not have
+// to be done on every print call. Also provides helpers for managing indentation.
+type Printer struct {
+	out     io.Writer
+	level   int    // current indentation level
+	indent  string // indentation string
+	pending bool   // if there's a pending indentation
+	err     error  // saved error from printing
+}
+
+// New builds a printer writing to w.
+func New(w io.Writer) Printer {
+	return Printer{
+		out:    w,
+		indent: DefaultIndent,
+	}
+}
+
+// SetIndentString configures the string used for one level of indentation.
+func (p *Printer) SetIndentString(indent string) {
+	p.indent = indent
+}
+
+// Indent by one level.
+func (p *Printer) Indent() {
+	p.level++
+}
+
+// Dedent by one level.
+func (p *Printer) Dedent() {
+	p.level--
+}
+
+// Linef prints a formatted line.
+func (p *Printer) Linef(format string, args ...interface{}) {
+	p.Printf(format, args...)
+	p.NL()
+}
+
+// NL prints a newline.
+func (p *Printer) NL() {
+	p.Printf("\n")
+	p.pending = true
+}
+
+// Printf prints formatted output.
+func (p *Printer) Printf(format string, args ...interface{}) {
+	if p.err != nil {
+		return
+	}
+	if p.pending {
+		indent := strings.Repeat(p.indent, p.level)
+		format = indent + format
+		p.pending = false
+	}
+	_, err := fmt.Fprintf(p.out, format, args...)
+	p.SetError(err)
+}
+
+// Error returns the first error that occurred so far, if any.
+func (p *Printer) Error() error {
+	return p.err
+}
+
+// SetError records a possible error.
+func (p *Printer) SetError(err error) {
+	if p.err == nil {
+		p.err = err
+	}
+}
+
+// TabWriter provides tabwriter.Writer functionality with the Printer interface.
+type TabWriter struct {
+	tw *tabwriter.Writer
+	Printer
+}
+
+// NewTabWriter builds a TabWriter. Arguments are the same as for tabwriter.NewWriter.
+func NewTabWriter(w io.Writer, minwidth, tabwidth, padding int, padchar byte, flags uint) *TabWriter {
+	tw := tabwriter.NewWriter(w, minwidth, tabwidth, padding, padchar, flags)
+	return &TabWriter{
+		tw:      tw,
+		Printer: New(tw),
+	}
+}
+
+// Flush the tabwriter.
+func (p *TabWriter) Flush() {
+	p.SetError(p.tw.Flush())
+}
diff --git a/vendor/github.com/mmcloughlin/addchain/logo.svg b/vendor/github.com/mmcloughlin/addchain/logo.svg
new file mode 100644
index 00000000000..0ab26295c04
--- /dev/null
+++ b/vendor/github.com/mmcloughlin/addchain/logo.svg
@@ -0,0 +1,5 @@
+<svg width="297" height="54" viewBox="0 0 297 54" fill="none" xmlns="http://www.w3.org/2000/svg">
+    <path d="M93.664 35.808C93.664 36.768 93.808 37.488 94.096 37.968C94.384 38.416 94.88 38.752 95.584 38.976L94.336 42.624C91.68 42.304 89.936 41.136 89.104 39.12C88.176 40.272 87.008 41.152 85.6 41.76C84.224 42.336 82.704 42.624 81.04 42.624C78.48 42.624 76.448 41.904 74.944 40.464C73.44 39.024 72.688 37.12 72.688 34.752C72.688 32.096 73.728 30.048 75.808 28.608C77.92 27.168 80.896 26.448 84.736 26.448H88.336V24.816C88.336 23.216 87.856 22.048 86.896 21.312C85.936 20.576 84.544 20.208 82.72 20.208C81.824 20.208 80.784 20.32 79.6 20.544C78.416 20.768 77.216 21.088 76 21.504L74.656 17.712C77.696 16.56 80.656 15.984 83.536 15.984C86.896 15.984 89.424 16.72 91.12 18.192C92.816 19.664 93.664 21.76 93.664 24.48V35.808ZM82.624 38.688C83.744 38.688 84.816 38.4 85.84 37.824C86.896 37.248 87.728 36.448 88.336 35.424V29.808H85.36C82.928 29.808 81.152 30.224 80.032 31.056C78.912 31.856 78.352 33.024 78.352 34.56C78.352 35.904 78.704 36.928 79.408 37.632C80.144 38.336 81.216 38.688 82.624 38.688ZM123.309 6.48V42H118.557L118.173 38.88C117.341 40.096 116.317 41.024 115.101 41.664C113.885 42.304 112.541 42.624 111.069 42.624C107.901 42.624 105.517 41.44 103.917 39.072C102.317 36.704 101.517 33.456 101.517 29.328C101.517 26.736 101.917 24.432 102.717 22.416C103.549 20.4 104.717 18.832 106.221 17.712C107.725 16.56 109.501 15.984 111.549 15.984C114.141 15.984 116.269 16.912 117.933 18.768V5.856L123.309 6.48ZM112.941 20.112C111.085 20.112 109.661 20.88 108.669 22.416C107.709 23.92 107.229 26.224 107.229 29.328C107.229 35.472 108.989 38.544 112.509 38.544C114.621 38.544 116.429 37.328 117.933 34.896V22.944C116.621 21.056 114.957 20.112 112.941 20.112ZM152.091 6.48V42H147.339L146.955 38.88C146.123 40.096 145.099 41.024 143.883 41.664C142.667 42.304 141.323 42.624 139.851 42.624C136.683 42.624 134.299 41.44 132.699 39.072C131.099 36.704 130.299 33.456 130.299 29.328C130.299 26.736 130.699 24.432 131.499 22.416C132.331 20.4 133.499 18.832 135.003 17.712C136.507 16.56 138.283 15.984 140.331 15.984C142.923 15.984 145.051 16.912 146.715 18.768V5.856L152.091 6.48ZM141.723 20.112C139.867 20.112 138.443 20.88 137.451 22.416C136.491 23.92 136.011 26.224 136.011 29.328C136.011 35.472 137.771 38.544 141.291 38.544C143.403 38.544 145.211 37.328 146.715 34.896V22.944C145.403 21.056 143.739 20.112 141.723 20.112ZM172.856 38.208C174.872 38.208 176.952 37.488 179.096 36.048L181.544 39.504C180.424 40.464 179.08 41.232 177.512 41.808C175.944 42.352 174.328 42.624 172.664 42.624C168.792 42.624 165.768 41.44 163.592 39.072C161.416 36.672 160.328 33.472 160.328 29.472C160.328 26.88 160.824 24.576 161.816 22.56C162.808 20.512 164.232 18.912 166.088 17.76C167.976 16.576 170.2 15.984 172.76 15.984C176.12 15.984 179.048 17.008 181.544 19.056L179.048 22.416C177 21.008 174.936 20.304 172.856 20.304C170.744 20.304 169.08 21.072 167.864 22.608C166.648 24.112 166.04 26.4 166.04 29.472C166.04 32.48 166.648 34.688 167.864 36.096C169.08 37.504 170.744 38.208 172.856 38.208ZM194.773 19.776C195.797 18.528 196.981 17.584 198.325 16.944C199.701 16.304 201.141 15.984 202.645 15.984C204.981 15.984 206.741 16.64 207.925 17.952C209.109 19.232 209.701 21.088 209.701 23.52V42H204.325V24.336C204.325 22.832 204.037 21.76 203.461 21.12C202.885 20.448 201.957 20.112 200.677 20.112C199.557 20.112 198.469 20.48 197.413 21.216C196.389 21.92 195.509 22.8 194.773 23.856V42H189.397V6.528L194.773 5.952V19.776ZM237.57 35.808C237.57 36.768 237.714 37.488 238.002 37.968C238.29 38.416 238.786 38.752 239.49 38.976L238.242 42.624C235.586 42.304 233.842 41.136 233.01 39.12C232.082 40.272 230.914 41.152 229.506 41.76C228.13 42.336 226.61 42.624 224.946 42.624C222.386 42.624 220.354 41.904 218.85 40.464C217.346 39.024 216.594 37.12 216.594 34.752C216.594 32.096 217.634 30.048 219.714 28.608C221.826 27.168 224.802 26.448 228.642 26.448H232.242V24.816C232.242 23.216 231.762 22.048 230.802 21.312C229.842 20.576 228.45 20.208 226.626 20.208C225.73 20.208 224.69 20.32 223.506 20.544C222.322 20.768 221.122 21.088 219.906 21.504L218.562 17.712C221.602 16.56 224.562 15.984 227.442 15.984C230.802 15.984 233.33 16.72 235.026 18.192C236.722 19.664 237.57 21.76 237.57 24.48V35.808ZM226.53 38.688C227.65 38.688 228.722 38.4 229.746 37.824C230.802 37.248 231.634 36.448 232.242 35.424V29.808H229.266C226.834 29.808 225.058 30.224 223.938 31.056C222.818 31.856 222.258 33.024 222.258 34.56C222.258 35.904 222.61 36.928 223.314 37.632C224.05 38.336 225.122 38.688 226.53 38.688ZM257.088 4.224C258.112 4.224 258.944 4.56 259.584 5.232C260.256 5.872 260.592 6.672 260.592 7.632C260.592 8.592 260.256 9.392 259.584 10.032C258.944 10.672 258.112 10.992 257.088 10.992C256.032 10.992 255.184 10.672 254.544 10.032C253.904 9.392 253.584 8.592 253.584 7.632C253.584 6.64 253.904 5.824 254.544 5.184C255.216 4.544 256.064 4.224 257.088 4.224ZM260.784 38.064H267.696V42H247.872V38.064H255.408V20.592H248.112V16.656H260.784V38.064ZM275.741 16.656H280.397L280.781 19.968C281.837 18.688 283.069 17.712 284.477 17.04C285.917 16.336 287.421 15.984 288.989 15.984C291.325 15.984 293.085 16.64 294.269 17.952C295.453 19.264 296.045 21.12 296.045 23.52V42H290.669V26.208C290.669 24.64 290.573 23.44 290.381 22.608C290.221 21.744 289.885 21.12 289.373 20.736C288.861 20.32 288.093 20.112 287.069 20.112C285.917 20.112 284.813 20.464 283.757 21.168C282.733 21.872 281.853 22.752 281.117 23.808V42H275.741V16.656Z" fill="#2d4059"/>
+    <path d="M54 18.0849L50.434 14.2641V14.5189L39.4811 3.56604L35.9151 0L27 8.91509L30.566 12.4811L35.9151 7.13207L46.8679 18.0849L30.566 34.1321L23.434 27L19.8679 30.566L30.566 41.5189L34.1321 37.6981L50.434 21.6509L54 18.0849Z" fill="#ea5455"/>
+    <path d="M18.0849 46.8679L7.13207 35.9151L23.434 19.8679L30.566 27L34.1321 23.434L23.434 12.4811L19.8679 16.3019L3.56604 32.349L0 35.9151L3.56604 39.7358V39.4811L14.5189 50.434L18.0849 54L27 45.0849L23.434 41.5189L18.0849 46.8679Z" fill="#ea5455"/>
+</svg>
diff --git a/vendor/github.com/mmcloughlin/addchain/meta/cite.go b/vendor/github.com/mmcloughlin/addchain/meta/cite.go
new file mode 100644
index 00000000000..bfaae04c759
--- /dev/null
+++ b/vendor/github.com/mmcloughlin/addchain/meta/cite.go
@@ -0,0 +1,61 @@
+package meta
+
+import (
+	"bytes"
+	"errors"
+	"fmt"
+	"io"
+	"strconv"
+	"strings"
+
+	"github.com/mmcloughlin/addchain/internal/print"
+)
+
+// CheckCitable checks whether a citation can be generated for this built
+// version.
+func (p *Properties) CheckCitable() error {
+	if !p.IsRelease() {
+		return errors.New("cannot cite non-release version")
+	}
+	return nil
+}
+
+// WriteCitation writes BibTeX citation for the most recent release to the given
+// writer.
+func (p *Properties) WriteCitation(w io.Writer) error {
+	// Determine release time.
+	date, err := p.ReleaseTime()
+	if err != nil {
+		return fmt.Errorf("release date: %w", err)
+	}
+
+	// Use tabwriter for field alignment.
+	tw := print.NewTabWriter(w, 1, 4, 1, ' ', 0)
+
+	field := func(key, value string) { tw.Linef("    %s\t=\t%s,", key, value) }
+	str := func(key, value string) { field(key, "{"+value+"}") }
+
+	tw.Linef("@misc{%s,", p.Name)
+	str("title", p.Title())
+	str("author", "Michael B. McLoughlin")
+	field("year", strconv.Itoa(date.Year()))
+	field("month", strings.ToLower(date.Month().String()[:3]))
+	str("howpublished", "Repository \\url{"+p.RepositoryURL()+"}")
+	str("version", p.ReleaseVersion)
+	str("license", "BSD 3-Clause License")
+	str("doi", p.DOI)
+	str("url", p.DOIURL())
+	tw.Linef("}")
+	tw.Flush()
+
+	return tw.Error()
+}
+
+// Citation returns a BibTeX citation for the most recent release.
+func (p *Properties) Citation() (string, error) {
+	buf := bytes.NewBuffer(nil)
+	if err := p.WriteCitation(buf); err != nil {
+		return "", err
+	}
+	return buf.String(), nil
+}
diff --git a/vendor/github.com/mmcloughlin/addchain/meta/meta.go b/vendor/github.com/mmcloughlin/addchain/meta/meta.go
new file mode 100644
index 00000000000..da8b0e2a982
--- /dev/null
+++ b/vendor/github.com/mmcloughlin/addchain/meta/meta.go
@@ -0,0 +1,101 @@
+// Package meta defines properties about this project.
+package meta
+
+import (
+	"fmt"
+	"path"
+	"time"
+)
+
+// VersionTagPrefix is the prefix used on Git tags corresponding to semantic
+// version releases.
+const VersionTagPrefix = "v"
+
+// Properties about this software package.
+type Properties struct {
+	// Name is the project name.
+	Name string
+
+	// FullName is the "owner/name" identifier for the project.
+	FullName string
+
+	// Description is the concise project headline.
+	Description string
+
+	// BuildVersion is the version that was built. Typically populated at build
+	// time and will typically be empty for non-release builds.
+	BuildVersion string
+
+	// ReleaseVersion is the version of the most recent release.
+	ReleaseVersion string
+
+	// ReleaseDate is the date of the most recent release. (RFC3339 date format.)
+	ReleaseDate string
+
+	// ConceptDOI is the DOI for all versions.
+	ConceptDOI string
+
+	// DOI for the most recent release.
+	DOI string
+
+	// ZenodoID is the Zenodo deposit ID for the most recent release.
+	ZenodoID string
+}
+
+// Meta defines specific properties for the current version of this software.
+var Meta = &Properties{
+	Name:           "addchain",
+	FullName:       "mmcloughlin/addchain",
+	Description:    "Cryptographic Addition Chain Generation in Go",
+	BuildVersion:   buildversion,
+	ReleaseVersion: releaseversion,
+	ReleaseDate:    releasedate,
+	ConceptDOI:     conceptdoi,
+	DOI:            doi,
+	ZenodoID:       zenodoid,
+}
+
+// Title is a full project title, suitable for a citation.
+func (p *Properties) Title() string {
+	return fmt.Sprintf("%s: %s", p.Name, p.Description)
+}
+
+// IsRelease reports whether the built version is a release.
+func (p *Properties) IsRelease() bool {
+	return p.BuildVersion == p.ReleaseVersion
+}
+
+// ReleaseTag returns the release tag corresponding to the most recent release.
+func (p *Properties) ReleaseTag() string {
+	return VersionTagPrefix + p.ReleaseVersion
+}
+
+// Module returns the Go module path.
+func (p *Properties) Module() string {
+	return path.Join("github.com", p.FullName)
+}
+
+// RepositoryURL returns a URL to the hosted repository.
+func (p *Properties) RepositoryURL() string {
+	return "https://" + p.Module()
+}
+
+// ReleaseURL returns the URL to the release page.
+func (p *Properties) ReleaseURL() string {
+	return fmt.Sprintf("%s/releases/tag/%s", p.RepositoryURL(), p.ReleaseTag())
+}
+
+// ReleaseTime returns the release date as a time object.
+func (p *Properties) ReleaseTime() (time.Time, error) {
+	return time.Parse("2006-01-02", p.ReleaseDate)
+}
+
+// DOIURL returns the DOI URL corresponding to the most recent release.
+func (p *Properties) DOIURL() string { return doiurl(p.DOI) }
+
+// ConceptDOIURL returns the DOI URL corresponding to the most recent release.
+func (p *Properties) ConceptDOIURL() string { return doiurl(p.ConceptDOI) }
+
+func doiurl(doi string) string {
+	return "https://doi.org/" + doi
+}
diff --git a/vendor/github.com/mmcloughlin/addchain/meta/vars.go b/vendor/github.com/mmcloughlin/addchain/meta/vars.go
new file mode 100644
index 00000000000..a034e8cd06c
--- /dev/null
+++ b/vendor/github.com/mmcloughlin/addchain/meta/vars.go
@@ -0,0 +1,10 @@
+package meta
+
+var (
+	buildversion   = ""
+	releaseversion = "0.4.0"
+	releasedate    = "2021-10-30"
+	conceptdoi     = "10.5281/zenodo.4625263"
+	doi            = "10.5281/zenodo.5622943"
+	zenodoid       = "5622943"
+)
diff --git a/vendor/github.com/mmcloughlin/addchain/program.go b/vendor/github.com/mmcloughlin/addchain/program.go
new file mode 100644
index 00000000000..6449910b068
--- /dev/null
+++ b/vendor/github.com/mmcloughlin/addchain/program.go
@@ -0,0 +1,133 @@
+package addchain
+
+import (
+	"fmt"
+	"math/big"
+
+	"github.com/mmcloughlin/addchain/internal/bigint"
+)
+
+// Op is an instruction to add positions I and J in a chain.
+type Op struct{ I, J int }
+
+// IsDouble returns whether this operation is a doubling.
+func (o Op) IsDouble() bool { return o.I == o.J }
+
+// Operands returns the indicies used in this operation. This will contain one
+// or two entries depending on whether this is a doubling.
+func (o Op) Operands() []int {
+	if o.IsDouble() {
+		return []int{o.I}
+	}
+	return []int{o.I, o.J}
+}
+
+// Uses reports whether the given index is one of the operands.
+func (o Op) Uses(i int) bool {
+	return o.I == i || o.J == i
+}
+
+// Program is a sequence of operations.
+type Program []Op
+
+// Shift appends a sequence of operations that bitwise shift index i left by s,
+// equivalent to s double operations. Returns the index of the result.
+func (p *Program) Shift(i int, s uint) (int, error) {
+	for ; s > 0; s-- {
+		next, err := p.Double(i)
+		if err != nil {
+			return 0, err
+		}
+		i = next
+	}
+	return i, nil
+}
+
+// Double appends an operation that doubles index i. Returns the index of the
+// result.
+func (p *Program) Double(i int) (int, error) {
+	return p.Add(i, i)
+}
+
+// Add appends an operation that adds indices i and j. Returns the index of the
+// result.
+func (p *Program) Add(i, j int) (int, error) {
+	if err := p.boundscheck(i); err != nil {
+		return 0, err
+	}
+	if err := p.boundscheck(j); err != nil {
+		return 0, err
+	}
+	*p = append(*p, Op{i, j})
+	return len(*p), nil
+}
+
+// boundscheck returns an error if i is out of bounds.
+func (p Program) boundscheck(i int) error {
+	// Note the corresponding chain is one longer than the program.
+	n := len(p)
+	switch {
+	case i < 0:
+		return fmt.Errorf("negative index %d", i)
+	case i > n:
+		return fmt.Errorf("index %d out of bounds", i)
+	}
+	return nil
+}
+
+// Doubles returns the number of doubles in the program.
+func (p Program) Doubles() int {
+	doubles, _ := p.Count()
+	return doubles
+}
+
+// Adds returns the number of adds in the program.
+func (p Program) Adds() int {
+	_, adds := p.Count()
+	return adds
+}
+
+// Count returns the number of doubles and adds in the program.
+func (p Program) Count() (doubles, adds int) {
+	for _, op := range p {
+		if op.IsDouble() {
+			doubles++
+		} else {
+			adds++
+		}
+	}
+	return
+}
+
+// Evaluate executes the program and returns the resulting chain.
+func (p Program) Evaluate() Chain {
+	c := New()
+	for _, op := range p {
+		sum := new(big.Int).Add(c[op.I], c[op.J])
+		c = append(c, sum)
+	}
+	return c
+}
+
+// ReadCounts returns how many times each index is read in the program.
+func (p Program) ReadCounts() []int {
+	reads := make([]int, len(p)+1)
+	for _, op := range p {
+		for _, i := range op.Operands() {
+			reads[i]++
+		}
+	}
+	return reads
+}
+
+// Dependencies returns an array of bitsets where each bitset contains the set
+// of indicies that contributed to that position.
+func (p Program) Dependencies() []*big.Int {
+	bitsets := []*big.Int{bigint.One()}
+	for i, op := range p {
+		bitset := new(big.Int).Or(bitsets[op.I], bitsets[op.J])
+		bitset.SetBit(bitset, i+1, 1)
+		bitsets = append(bitsets, bitset)
+	}
+	return bitsets
+}
diff --git a/vendor/golang.org/x/sys/unix/ioctl_linux.go b/vendor/golang.org/x/sys/unix/ioctl_linux.go
new file mode 100644
index 00000000000..48773f730ac
--- /dev/null
+++ b/vendor/golang.org/x/sys/unix/ioctl_linux.go
@@ -0,0 +1,196 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unix
+
+import (
+	"runtime"
+	"unsafe"
+)
+
+// IoctlRetInt performs an ioctl operation specified by req on a device
+// associated with opened file descriptor fd, and returns a non-negative
+// integer that is returned by the ioctl syscall.
+func IoctlRetInt(fd int, req uint) (int, error) {
+	ret, _, err := Syscall(SYS_IOCTL, uintptr(fd), uintptr(req), 0)
+	if err != 0 {
+		return 0, err
+	}
+	return int(ret), nil
+}
+
+func IoctlGetUint32(fd int, req uint) (uint32, error) {
+	var value uint32
+	err := ioctl(fd, req, uintptr(unsafe.Pointer(&value)))
+	return value, err
+}
+
+func IoctlGetRTCTime(fd int) (*RTCTime, error) {
+	var value RTCTime
+	err := ioctl(fd, RTC_RD_TIME, uintptr(unsafe.Pointer(&value)))
+	return &value, err
+}
+
+func IoctlSetRTCTime(fd int, value *RTCTime) error {
+	err := ioctl(fd, RTC_SET_TIME, uintptr(unsafe.Pointer(value)))
+	runtime.KeepAlive(value)
+	return err
+}
+
+func IoctlGetRTCWkAlrm(fd int) (*RTCWkAlrm, error) {
+	var value RTCWkAlrm
+	err := ioctl(fd, RTC_WKALM_RD, uintptr(unsafe.Pointer(&value)))
+	return &value, err
+}
+
+func IoctlSetRTCWkAlrm(fd int, value *RTCWkAlrm) error {
+	err := ioctl(fd, RTC_WKALM_SET, uintptr(unsafe.Pointer(value)))
+	runtime.KeepAlive(value)
+	return err
+}
+
+type ifreqEthtool struct {
+	name [IFNAMSIZ]byte
+	data unsafe.Pointer
+}
+
+// IoctlGetEthtoolDrvinfo fetches ethtool driver information for the network
+// device specified by ifname.
+func IoctlGetEthtoolDrvinfo(fd int, ifname string) (*EthtoolDrvinfo, error) {
+	// Leave room for terminating NULL byte.
+	if len(ifname) >= IFNAMSIZ {
+		return nil, EINVAL
+	}
+
+	value := EthtoolDrvinfo{
+		Cmd: ETHTOOL_GDRVINFO,
+	}
+	ifreq := ifreqEthtool{
+		data: unsafe.Pointer(&value),
+	}
+	copy(ifreq.name[:], ifname)
+	err := ioctl(fd, SIOCETHTOOL, uintptr(unsafe.Pointer(&ifreq)))
+	runtime.KeepAlive(ifreq)
+	return &value, err
+}
+
+// IoctlGetWatchdogInfo fetches information about a watchdog device from the
+// Linux watchdog API. For more information, see:
+// https://www.kernel.org/doc/html/latest/watchdog/watchdog-api.html.
+func IoctlGetWatchdogInfo(fd int) (*WatchdogInfo, error) {
+	var value WatchdogInfo
+	err := ioctl(fd, WDIOC_GETSUPPORT, uintptr(unsafe.Pointer(&value)))
+	return &value, err
+}
+
+// IoctlWatchdogKeepalive issues a keepalive ioctl to a watchdog device. For
+// more information, see:
+// https://www.kernel.org/doc/html/latest/watchdog/watchdog-api.html.
+func IoctlWatchdogKeepalive(fd int) error {
+	return ioctl(fd, WDIOC_KEEPALIVE, 0)
+}
+
+// IoctlFileCloneRange performs an FICLONERANGE ioctl operation to clone the
+// range of data conveyed in value to the file associated with the file
+// descriptor destFd. See the ioctl_ficlonerange(2) man page for details.
+func IoctlFileCloneRange(destFd int, value *FileCloneRange) error {
+	err := ioctl(destFd, FICLONERANGE, uintptr(unsafe.Pointer(value)))
+	runtime.KeepAlive(value)
+	return err
+}
+
+// IoctlFileClone performs an FICLONE ioctl operation to clone the entire file
+// associated with the file description srcFd to the file associated with the
+// file descriptor destFd. See the ioctl_ficlone(2) man page for details.
+func IoctlFileClone(destFd, srcFd int) error {
+	return ioctl(destFd, FICLONE, uintptr(srcFd))
+}
+
+type FileDedupeRange struct {
+	Src_offset uint64
+	Src_length uint64
+	Reserved1  uint16
+	Reserved2  uint32
+	Info       []FileDedupeRangeInfo
+}
+
+type FileDedupeRangeInfo struct {
+	Dest_fd       int64
+	Dest_offset   uint64
+	Bytes_deduped uint64
+	Status        int32
+	Reserved      uint32
+}
+
+// IoctlFileDedupeRange performs an FIDEDUPERANGE ioctl operation to share the
+// range of data conveyed in value from the file associated with the file
+// descriptor srcFd to the value.Info destinations. See the
+// ioctl_fideduperange(2) man page for details.
+func IoctlFileDedupeRange(srcFd int, value *FileDedupeRange) error {
+	buf := make([]byte, SizeofRawFileDedupeRange+
+		len(value.Info)*SizeofRawFileDedupeRangeInfo)
+	rawrange := (*RawFileDedupeRange)(unsafe.Pointer(&buf[0]))
+	rawrange.Src_offset = value.Src_offset
+	rawrange.Src_length = value.Src_length
+	rawrange.Dest_count = uint16(len(value.Info))
+	rawrange.Reserved1 = value.Reserved1
+	rawrange.Reserved2 = value.Reserved2
+
+	for i := range value.Info {
+		rawinfo := (*RawFileDedupeRangeInfo)(unsafe.Pointer(
+			uintptr(unsafe.Pointer(&buf[0])) + uintptr(SizeofRawFileDedupeRange) +
+				uintptr(i*SizeofRawFileDedupeRangeInfo)))
+		rawinfo.Dest_fd = value.Info[i].Dest_fd
+		rawinfo.Dest_offset = value.Info[i].Dest_offset
+		rawinfo.Bytes_deduped = value.Info[i].Bytes_deduped
+		rawinfo.Status = value.Info[i].Status
+		rawinfo.Reserved = value.Info[i].Reserved
+	}
+
+	err := ioctl(srcFd, FIDEDUPERANGE, uintptr(unsafe.Pointer(&buf[0])))
+
+	// Output
+	for i := range value.Info {
+		rawinfo := (*RawFileDedupeRangeInfo)(unsafe.Pointer(
+			uintptr(unsafe.Pointer(&buf[0])) + uintptr(SizeofRawFileDedupeRange) +
+				uintptr(i*SizeofRawFileDedupeRangeInfo)))
+		value.Info[i].Dest_fd = rawinfo.Dest_fd
+		value.Info[i].Dest_offset = rawinfo.Dest_offset
+		value.Info[i].Bytes_deduped = rawinfo.Bytes_deduped
+		value.Info[i].Status = rawinfo.Status
+		value.Info[i].Reserved = rawinfo.Reserved
+	}
+
+	return err
+}
+
+func IoctlHIDGetDesc(fd int, value *HIDRawReportDescriptor) error {
+	err := ioctl(fd, HIDIOCGRDESC, uintptr(unsafe.Pointer(value)))
+	runtime.KeepAlive(value)
+	return err
+}
+
+func IoctlHIDGetRawInfo(fd int) (*HIDRawDevInfo, error) {
+	var value HIDRawDevInfo
+	err := ioctl(fd, HIDIOCGRAWINFO, uintptr(unsafe.Pointer(&value)))
+	return &value, err
+}
+
+func IoctlHIDGetRawName(fd int) (string, error) {
+	var value [_HIDIOCGRAWNAME_LEN]byte
+	err := ioctl(fd, _HIDIOCGRAWNAME, uintptr(unsafe.Pointer(&value[0])))
+	return ByteSliceToString(value[:]), err
+}
+
+func IoctlHIDGetRawPhys(fd int) (string, error) {
+	var value [_HIDIOCGRAWPHYS_LEN]byte
+	err := ioctl(fd, _HIDIOCGRAWPHYS, uintptr(unsafe.Pointer(&value[0])))
+	return ByteSliceToString(value[:]), err
+}
+
+func IoctlHIDGetRawUniq(fd int) (string, error) {
+	var value [_HIDIOCGRAWUNIQ_LEN]byte
+	err := ioctl(fd, _HIDIOCGRAWUNIQ, uintptr(unsafe.Pointer(&value[0])))
+	return ByteSliceToString(value[:]), err
+}
diff --git a/vendor/golang.org/x/sys/unix/mkerrors.sh b/vendor/golang.org/x/sys/unix/mkerrors.sh
index f2bc8631494..007358af8fc 100644
--- a/vendor/golang.org/x/sys/unix/mkerrors.sh
+++ b/vendor/golang.org/x/sys/unix/mkerrors.sh
@@ -405,10 +405,11 @@ includes_SunOS='
 #include <net/if_arp.h>
 #include <net/if_types.h>
 #include <net/route.h>
+#include <netinet/icmp6.h>
 #include <netinet/in.h>
-#include <termios.h>
 #include <netinet/ip.h>
 #include <netinet/ip_mroute.h>
+#include <termios.h>
 '
 
 
@@ -499,10 +500,10 @@ ccflags="$@"
 		$2 ~ /^LOCK_(SH|EX|NB|UN)$/ ||
 		$2 ~ /^LO_(KEY|NAME)_SIZE$/ ||
 		$2 ~ /^LOOP_(CLR|CTL|GET|SET)_/ ||
-		$2 ~ /^(AF|SOCK|SO|SOL|IPPROTO|IP|IPV6|ICMP6|TCP|MCAST|EVFILT|NOTE|SHUT|PROT|MAP|MFD|T?PACKET|MSG|SCM|MCL|DT|MADV|PR|LOCAL)_/ ||
+		$2 ~ /^(AF|SOCK|SO|SOL|IPPROTO|IP|IPV6|TCP|MCAST|EVFILT|NOTE|SHUT|PROT|MAP|MFD|T?PACKET|MSG|SCM|MCL|DT|MADV|PR|LOCAL)_/ ||
 		$2 ~ /^TP_STATUS_/ ||
 		$2 ~ /^FALLOC_/ ||
-		$2 ~ /^ICMP(V6)?_FILTER/ ||
+		$2 ~ /^ICMPV?6?_(FILTER|SEC)/ ||
 		$2 == "SOMAXCONN" ||
 		$2 == "NAME_MAX" ||
 		$2 == "IFNAMSIZ" ||
diff --git a/vendor/golang.org/x/sys/unix/syscall_linux.go b/vendor/golang.org/x/sys/unix/syscall_linux.go
index 44ea96e39c6..4263953bee3 100644
--- a/vendor/golang.org/x/sys/unix/syscall_linux.go
+++ b/vendor/golang.org/x/sys/unix/syscall_linux.go
@@ -70,167 +70,7 @@ func Fchmodat(dirfd int, path string, mode uint32, flags int) (err error) {
 
 // ioctl itself should not be exposed directly, but additional get/set
 // functions for specific types are permissible.
-
-// IoctlRetInt performs an ioctl operation specified by req on a device
-// associated with opened file descriptor fd, and returns a non-negative
-// integer that is returned by the ioctl syscall.
-func IoctlRetInt(fd int, req uint) (int, error) {
-	ret, _, err := Syscall(SYS_IOCTL, uintptr(fd), uintptr(req), 0)
-	if err != 0 {
-		return 0, err
-	}
-	return int(ret), nil
-}
-
-func IoctlSetRTCTime(fd int, value *RTCTime) error {
-	err := ioctl(fd, RTC_SET_TIME, uintptr(unsafe.Pointer(value)))
-	runtime.KeepAlive(value)
-	return err
-}
-
-func IoctlSetRTCWkAlrm(fd int, value *RTCWkAlrm) error {
-	err := ioctl(fd, RTC_WKALM_SET, uintptr(unsafe.Pointer(value)))
-	runtime.KeepAlive(value)
-	return err
-}
-
-func IoctlGetUint32(fd int, req uint) (uint32, error) {
-	var value uint32
-	err := ioctl(fd, req, uintptr(unsafe.Pointer(&value)))
-	return value, err
-}
-
-func IoctlGetRTCTime(fd int) (*RTCTime, error) {
-	var value RTCTime
-	err := ioctl(fd, RTC_RD_TIME, uintptr(unsafe.Pointer(&value)))
-	return &value, err
-}
-
-// IoctlGetWatchdogInfo fetches information about a watchdog device from the
-// Linux watchdog API. For more information, see:
-// https://www.kernel.org/doc/html/latest/watchdog/watchdog-api.html.
-func IoctlGetWatchdogInfo(fd int) (*WatchdogInfo, error) {
-	var value WatchdogInfo
-	err := ioctl(fd, WDIOC_GETSUPPORT, uintptr(unsafe.Pointer(&value)))
-	return &value, err
-}
-
-func IoctlGetRTCWkAlrm(fd int) (*RTCWkAlrm, error) {
-	var value RTCWkAlrm
-	err := ioctl(fd, RTC_WKALM_RD, uintptr(unsafe.Pointer(&value)))
-	return &value, err
-}
-
-// IoctlFileCloneRange performs an FICLONERANGE ioctl operation to clone the
-// range of data conveyed in value to the file associated with the file
-// descriptor destFd. See the ioctl_ficlonerange(2) man page for details.
-func IoctlFileCloneRange(destFd int, value *FileCloneRange) error {
-	err := ioctl(destFd, FICLONERANGE, uintptr(unsafe.Pointer(value)))
-	runtime.KeepAlive(value)
-	return err
-}
-
-// IoctlFileClone performs an FICLONE ioctl operation to clone the entire file
-// associated with the file description srcFd to the file associated with the
-// file descriptor destFd. See the ioctl_ficlone(2) man page for details.
-func IoctlFileClone(destFd, srcFd int) error {
-	return ioctl(destFd, FICLONE, uintptr(srcFd))
-}
-
-type FileDedupeRange struct {
-	Src_offset uint64
-	Src_length uint64
-	Reserved1  uint16
-	Reserved2  uint32
-	Info       []FileDedupeRangeInfo
-}
-
-type FileDedupeRangeInfo struct {
-	Dest_fd       int64
-	Dest_offset   uint64
-	Bytes_deduped uint64
-	Status        int32
-	Reserved      uint32
-}
-
-// IoctlFileDedupeRange performs an FIDEDUPERANGE ioctl operation to share the
-// range of data conveyed in value from the file associated with the file
-// descriptor srcFd to the value.Info destinations. See the
-// ioctl_fideduperange(2) man page for details.
-func IoctlFileDedupeRange(srcFd int, value *FileDedupeRange) error {
-	buf := make([]byte, SizeofRawFileDedupeRange+
-		len(value.Info)*SizeofRawFileDedupeRangeInfo)
-	rawrange := (*RawFileDedupeRange)(unsafe.Pointer(&buf[0]))
-	rawrange.Src_offset = value.Src_offset
-	rawrange.Src_length = value.Src_length
-	rawrange.Dest_count = uint16(len(value.Info))
-	rawrange.Reserved1 = value.Reserved1
-	rawrange.Reserved2 = value.Reserved2
-
-	for i := range value.Info {
-		rawinfo := (*RawFileDedupeRangeInfo)(unsafe.Pointer(
-			uintptr(unsafe.Pointer(&buf[0])) + uintptr(SizeofRawFileDedupeRange) +
-				uintptr(i*SizeofRawFileDedupeRangeInfo)))
-		rawinfo.Dest_fd = value.Info[i].Dest_fd
-		rawinfo.Dest_offset = value.Info[i].Dest_offset
-		rawinfo.Bytes_deduped = value.Info[i].Bytes_deduped
-		rawinfo.Status = value.Info[i].Status
-		rawinfo.Reserved = value.Info[i].Reserved
-	}
-
-	err := ioctl(srcFd, FIDEDUPERANGE, uintptr(unsafe.Pointer(&buf[0])))
-
-	// Output
-	for i := range value.Info {
-		rawinfo := (*RawFileDedupeRangeInfo)(unsafe.Pointer(
-			uintptr(unsafe.Pointer(&buf[0])) + uintptr(SizeofRawFileDedupeRange) +
-				uintptr(i*SizeofRawFileDedupeRangeInfo)))
-		value.Info[i].Dest_fd = rawinfo.Dest_fd
-		value.Info[i].Dest_offset = rawinfo.Dest_offset
-		value.Info[i].Bytes_deduped = rawinfo.Bytes_deduped
-		value.Info[i].Status = rawinfo.Status
-		value.Info[i].Reserved = rawinfo.Reserved
-	}
-
-	return err
-}
-
-// IoctlWatchdogKeepalive issues a keepalive ioctl to a watchdog device. For
-// more information, see:
-// https://www.kernel.org/doc/html/latest/watchdog/watchdog-api.html.
-func IoctlWatchdogKeepalive(fd int) error {
-	return ioctl(fd, WDIOC_KEEPALIVE, 0)
-}
-
-func IoctlHIDGetDesc(fd int, value *HIDRawReportDescriptor) error {
-	err := ioctl(fd, HIDIOCGRDESC, uintptr(unsafe.Pointer(value)))
-	runtime.KeepAlive(value)
-	return err
-}
-
-func IoctlHIDGetRawInfo(fd int) (*HIDRawDevInfo, error) {
-	var value HIDRawDevInfo
-	err := ioctl(fd, HIDIOCGRAWINFO, uintptr(unsafe.Pointer(&value)))
-	return &value, err
-}
-
-func IoctlHIDGetRawName(fd int) (string, error) {
-	var value [_HIDIOCGRAWNAME_LEN]byte
-	err := ioctl(fd, _HIDIOCGRAWNAME, uintptr(unsafe.Pointer(&value[0])))
-	return ByteSliceToString(value[:]), err
-}
-
-func IoctlHIDGetRawPhys(fd int) (string, error) {
-	var value [_HIDIOCGRAWPHYS_LEN]byte
-	err := ioctl(fd, _HIDIOCGRAWPHYS, uintptr(unsafe.Pointer(&value[0])))
-	return ByteSliceToString(value[:]), err
-}
-
-func IoctlHIDGetRawUniq(fd int) (string, error) {
-	var value [_HIDIOCGRAWUNIQ_LEN]byte
-	err := ioctl(fd, _HIDIOCGRAWUNIQ, uintptr(unsafe.Pointer(&value[0])))
-	return ByteSliceToString(value[:]), err
-}
+// These are defined in ioctl.go and ioctl_linux.go.
 
 //sys	Linkat(olddirfd int, oldpath string, newdirfd int, newpath string, flags int) (err error)
 
@@ -857,16 +697,19 @@ type SockaddrVM struct {
 	// CID and Port specify a context ID and port address for a VM socket.
 	// Guests have a unique CID, and hosts may have a well-known CID of:
 	//  - VMADDR_CID_HYPERVISOR: refers to the hypervisor process.
+	//  - VMADDR_CID_LOCAL: refers to local communication (loopback).
 	//  - VMADDR_CID_HOST: refers to other processes on the host.
-	CID  uint32
-	Port uint32
-	raw  RawSockaddrVM
+	CID   uint32
+	Port  uint32
+	Flags uint8
+	raw   RawSockaddrVM
 }
 
 func (sa *SockaddrVM) sockaddr() (unsafe.Pointer, _Socklen, error) {
 	sa.raw.Family = AF_VSOCK
 	sa.raw.Port = sa.Port
 	sa.raw.Cid = sa.CID
+	sa.raw.Flags = sa.Flags
 
 	return unsafe.Pointer(&sa.raw), SizeofSockaddrVM, nil
 }
@@ -1171,8 +1014,9 @@ func anyToSockaddr(fd int, rsa *RawSockaddrAny) (Sockaddr, error) {
 	case AF_VSOCK:
 		pp := (*RawSockaddrVM)(unsafe.Pointer(rsa))
 		sa := &SockaddrVM{
-			CID:  pp.Cid,
-			Port: pp.Port,
+			CID:   pp.Cid,
+			Port:  pp.Port,
+			Flags: pp.Flags,
 		}
 		return sa, nil
 	case AF_BLUETOOTH:
diff --git a/vendor/golang.org/x/sys/unix/zerrors_freebsd_arm.go b/vendor/golang.org/x/sys/unix/zerrors_freebsd_arm.go
index 0326a6b3af9..3df99f285f1 100644
--- a/vendor/golang.org/x/sys/unix/zerrors_freebsd_arm.go
+++ b/vendor/golang.org/x/sys/unix/zerrors_freebsd_arm.go
@@ -1022,6 +1022,15 @@ const (
 	MAP_RESERVED0100               = 0x100
 	MAP_SHARED                     = 0x1
 	MAP_STACK                      = 0x400
+	MCAST_BLOCK_SOURCE             = 0x54
+	MCAST_EXCLUDE                  = 0x2
+	MCAST_INCLUDE                  = 0x1
+	MCAST_JOIN_GROUP               = 0x50
+	MCAST_JOIN_SOURCE_GROUP        = 0x52
+	MCAST_LEAVE_GROUP              = 0x51
+	MCAST_LEAVE_SOURCE_GROUP       = 0x53
+	MCAST_UNBLOCK_SOURCE           = 0x55
+	MCAST_UNDEFINED                = 0x0
 	MCL_CURRENT                    = 0x1
 	MCL_FUTURE                     = 0x2
 	MNT_ACLS                       = 0x8000000
diff --git a/vendor/golang.org/x/sys/unix/zerrors_solaris_amd64.go b/vendor/golang.org/x/sys/unix/zerrors_solaris_amd64.go
index 65fb2c5cd83..1afee6a0890 100644
--- a/vendor/golang.org/x/sys/unix/zerrors_solaris_amd64.go
+++ b/vendor/golang.org/x/sys/unix/zerrors_solaris_amd64.go
@@ -366,6 +366,7 @@ const (
 	HUPCL                         = 0x400
 	IBSHIFT                       = 0x10
 	ICANON                        = 0x2
+	ICMP6_FILTER                  = 0x1
 	ICRNL                         = 0x100
 	IEXTEN                        = 0x8000
 	IFF_ADDRCONF                  = 0x80000
@@ -612,6 +613,7 @@ const (
 	IP_RECVPKTINFO                = 0x1a
 	IP_RECVRETOPTS                = 0x6
 	IP_RECVSLLA                   = 0xa
+	IP_RECVTOS                    = 0xc
 	IP_RECVTTL                    = 0xb
 	IP_RETOPTS                    = 0x8
 	IP_REUSEADDR                  = 0x104
@@ -704,6 +706,7 @@ const (
 	O_APPEND                      = 0x8
 	O_CLOEXEC                     = 0x800000
 	O_CREAT                       = 0x100
+	O_DIRECT                      = 0x2000000
 	O_DIRECTORY                   = 0x1000000
 	O_DSYNC                       = 0x40
 	O_EXCL                        = 0x400
diff --git a/vendor/golang.org/x/sys/unix/zerrors_zos_s390x.go b/vendor/golang.org/x/sys/unix/zerrors_zos_s390x.go
index 4117ce08a50..4e87b4bebd5 100644
--- a/vendor/golang.org/x/sys/unix/zerrors_zos_s390x.go
+++ b/vendor/golang.org/x/sys/unix/zerrors_zos_s390x.go
@@ -137,6 +137,7 @@ const (
 	IP_TTL                          = 3
 	IP_UNBLOCK_SOURCE               = 11
 	ICANON                          = 0x0010
+	ICMP6_FILTER                    = 0x26
 	ICRNL                           = 0x0002
 	IEXTEN                          = 0x0020
 	IGNBRK                          = 0x0004
@@ -163,6 +164,12 @@ const (
 	MAP_PRIVATE                     = 0x1 // changes are private
 	MAP_SHARED                      = 0x2 // changes are shared
 	MAP_FIXED                       = 0x4 // place exactly
+	MCAST_JOIN_GROUP                = 40
+	MCAST_LEAVE_GROUP               = 41
+	MCAST_JOIN_SOURCE_GROUP         = 42
+	MCAST_LEAVE_SOURCE_GROUP        = 43
+	MCAST_BLOCK_SOURCE              = 44
+	MCAST_UNBLOCK_SOURCE            = 45
 	MS_SYNC                         = 0x1 // msync - synchronous writes
 	MS_ASYNC                        = 0x2 // asynchronous writes
 	MS_INVALIDATE                   = 0x4 // invalidate mappings
diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux.go b/vendor/golang.org/x/sys/unix/ztypes_linux.go
index c769e73cd6f..3bfc6f7323b 100644
--- a/vendor/golang.org/x/sys/unix/ztypes_linux.go
+++ b/vendor/golang.org/x/sys/unix/ztypes_linux.go
@@ -3698,6 +3698,21 @@ const (
 	ETHTOOL_A_TUNNEL_INFO_MAX                 = 0x2
 )
 
+type EthtoolDrvinfo struct {
+	Cmd          uint32
+	Driver       [32]byte
+	Version      [32]byte
+	Fw_version   [32]byte
+	Bus_info     [32]byte
+	Erom_version [32]byte
+	Reserved2    [12]byte
+	N_priv_flags uint32
+	N_stats      uint32
+	Testinfo_len uint32
+	Eedump_len   uint32
+	Regdump_len  uint32
+}
+
 type (
 	HIDRawReportDescriptor struct {
 		Size  uint32
diff --git a/vendor/golang.org/x/sys/windows/security_windows.go b/vendor/golang.org/x/sys/windows/security_windows.go
index 0e428ecbbde..111c10d3a7f 100644
--- a/vendor/golang.org/x/sys/windows/security_windows.go
+++ b/vendor/golang.org/x/sys/windows/security_windows.go
@@ -1334,7 +1334,11 @@ func (absoluteSD *SECURITY_DESCRIPTOR) ToSelfRelative() (selfRelativeSD *SECURIT
 }
 
 func (selfRelativeSD *SECURITY_DESCRIPTOR) copySelfRelativeSecurityDescriptor() *SECURITY_DESCRIPTOR {
-	sdLen := (int)(selfRelativeSD.Length())
+	sdLen := int(selfRelativeSD.Length())
+	const min = int(unsafe.Sizeof(SECURITY_DESCRIPTOR{}))
+	if sdLen < min {
+		sdLen = min
+	}
 
 	var src []byte
 	h := (*unsafeheader.Slice)(unsafe.Pointer(&src))
@@ -1342,7 +1346,15 @@ func (selfRelativeSD *SECURITY_DESCRIPTOR) copySelfRelativeSecurityDescriptor()
 	h.Len = sdLen
 	h.Cap = sdLen
 
-	dst := make([]byte, sdLen)
+	const psize = int(unsafe.Sizeof(uintptr(0)))
+
+	var dst []byte
+	h = (*unsafeheader.Slice)(unsafe.Pointer(&dst))
+	alloc := make([]uintptr, (sdLen+psize-1)/psize)
+	h.Data = (*unsafeheader.Slice)(unsafe.Pointer(&alloc)).Data
+	h.Len = sdLen
+	h.Cap = sdLen
+
 	copy(dst, src)
 	return (*SECURITY_DESCRIPTOR)(unsafe.Pointer(&dst[0]))
 }
diff --git a/vendor/modules.txt b/vendor/modules.txt
index 9873fade591..d204e4f5e68 100644
--- a/vendor/modules.txt
+++ b/vendor/modules.txt
@@ -8,7 +8,7 @@ github.com/Azure/go-ansiterm/winterm
 # github.com/DataDog/zstd v1.4.5
 ## explicit
 github.com/DataDog/zstd
-# github.com/IBM/idemix v0.0.0-20210930104432-e4a1410f5353
+# github.com/IBM/idemix v0.0.0-20220112103229-701e7610d405
 ## explicit
 github.com/IBM/idemix
 github.com/IBM/idemix/bccsp
@@ -20,7 +20,7 @@ github.com/IBM/idemix/bccsp/schemes/dlog/crypto/translator/amcl
 github.com/IBM/idemix/bccsp/schemes/dlog/handlers
 github.com/IBM/idemix/common/flogging
 github.com/IBM/idemix/common/flogging/fabenc
-# github.com/IBM/mathlib v0.0.0-20210928081244-f5486459a290
+# github.com/IBM/mathlib v0.0.0-20220112091634-0a7378db6912
 github.com/IBM/mathlib
 github.com/IBM/mathlib/driver
 github.com/IBM/mathlib/driver/amcl
@@ -71,14 +71,18 @@ github.com/alecthomas/template/parse
 github.com/alecthomas/units
 # github.com/beorn7/perks v1.0.1
 github.com/beorn7/perks/quantile
-# github.com/cespare/xxhash/v2 v2.1.1
+# github.com/cespare/xxhash/v2 v2.1.1 => github.com/cespare/xxhash/v2 v2.1.2
 github.com/cespare/xxhash/v2
-# github.com/consensys/gnark-crypto v0.4.0
+# github.com/consensys/gnark-crypto v0.6.0
 github.com/consensys/gnark-crypto/ecc
 github.com/consensys/gnark-crypto/ecc/bn254
 github.com/consensys/gnark-crypto/ecc/bn254/fp
 github.com/consensys/gnark-crypto/ecc/bn254/fr
+github.com/consensys/gnark-crypto/ecc/bn254/fr/mimc
 github.com/consensys/gnark-crypto/ecc/bn254/internal/fptower
+github.com/consensys/gnark-crypto/field
+github.com/consensys/gnark-crypto/field/internal/addchain
+github.com/consensys/gnark-crypto/internal/generator/config
 github.com/consensys/gnark-crypto/internal/parallel
 # github.com/containerd/cgroups v0.0.0-20200531161412-0dbf7f05ba59
 github.com/containerd/cgroups/stats/v1
@@ -261,6 +265,29 @@ github.com/miekg/pkcs11
 # github.com/mitchellh/mapstructure v1.3.2
 ## explicit
 github.com/mitchellh/mapstructure
+# github.com/mmcloughlin/addchain v0.4.0
+github.com/mmcloughlin/addchain
+github.com/mmcloughlin/addchain/acc
+github.com/mmcloughlin/addchain/acc/ast
+github.com/mmcloughlin/addchain/acc/ir
+github.com/mmcloughlin/addchain/acc/parse
+github.com/mmcloughlin/addchain/acc/parse/internal/parser
+github.com/mmcloughlin/addchain/acc/pass
+github.com/mmcloughlin/addchain/acc/printer
+github.com/mmcloughlin/addchain/alg
+github.com/mmcloughlin/addchain/alg/contfrac
+github.com/mmcloughlin/addchain/alg/dict
+github.com/mmcloughlin/addchain/alg/ensemble
+github.com/mmcloughlin/addchain/alg/exec
+github.com/mmcloughlin/addchain/alg/heuristic
+github.com/mmcloughlin/addchain/alg/opt
+github.com/mmcloughlin/addchain/internal/bigint
+github.com/mmcloughlin/addchain/internal/bigints
+github.com/mmcloughlin/addchain/internal/bigvector
+github.com/mmcloughlin/addchain/internal/container/heap
+github.com/mmcloughlin/addchain/internal/errutil
+github.com/mmcloughlin/addchain/internal/print
+github.com/mmcloughlin/addchain/meta
 # github.com/moby/sys/mount v0.2.0
 github.com/moby/sys/mount
 # github.com/moby/sys/mountinfo v0.4.0
@@ -456,7 +483,7 @@ golang.org/x/net/internal/timeseries
 golang.org/x/net/trace
 # golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e
 golang.org/x/sync/errgroup
-# golang.org/x/sys v0.0.0-20210326220804-49726bf1d181
+# golang.org/x/sys v0.0.0-20210420205809-ac73e9fd8988
 golang.org/x/sys/cpu
 golang.org/x/sys/internal/unsafeheader
 golang.org/x/sys/unix
@@ -557,3 +584,4 @@ gopkg.in/yaml.v2
 ## explicit
 gopkg.in/yaml.v3
 # github.com/onsi/gomega => github.com/onsi/gomega v1.9.0
+# github.com/cespare/xxhash/v2 => github.com/cespare/xxhash/v2 v2.1.2