commit 12195c3ddcde3ed23d1d79924d05e3a5e9d4ae99 Author: Armin Friedl Date: Wed Apr 4 15:18:34 2018 +0200 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..de0ba63 --- /dev/null +++ b/.gitignore @@ -0,0 +1,411 @@ +*.blf + +# copied from brainflyer gitignore + +# Object files +*.o +*.ko +*.obj +*.elf + +# Precompiled Headers +*.gch +*.pch + +# Libraries +*.lib +*.a +*.la +*.lo + +# Shared objects (inc. Windows DLLs) +*.dll +*.so +*.so.* +*.dylib + +# Executables +*.exe +*.out +*.app +*.i*86 +*.x86_64 +*.hex + +brainflayer +hexln +hex2blf +blfchk +ecmtabgen +filehex + +# Debug files +*.dSYM/ + +# Tags +GPATH +GRTAGS +GTAGS + +# misc +old +notes + +# Created by https://www.gitignore.io/api/vim,java,linux,emacs,python,eclipse,windows,intellij+all + +### Eclipse ### + +.metadata +bin/ +tmp/ +*.tmp +*.bak +*.swp +*~.nib +local.properties +.settings/ +.loadpath +.recommenders + +# External tool builders +.externalToolBuilders/ + +# Locally stored "Eclipse launch configurations" +*.launch + +# PyDev specific (Python IDE for Eclipse) +*.pydevproject + +# CDT-specific (C/C++ Development Tooling) +.cproject + +# Java annotation processor (APT) +.factorypath + +# PDT-specific (PHP Development Tools) +.buildpath + +# sbteclipse plugin +.target + +# Tern plugin +.tern-project + +# TeXlipse plugin +.texlipse + +# STS (Spring Tool Suite) +.springBeans + +# Code Recommenders +.recommenders/ + +# Scala IDE specific (Scala & Java development for Eclipse) +.cache-main +.scala_dependencies +.worksheet + +### Eclipse Patch ### +# Eclipse Core +.project + +# JDT-specific (Eclipse Java Development Tools) +.classpath + +### Emacs ### +# -*- mode: gitignore; -*- +*~ +\#*\# +/.emacs.desktop +/.emacs.desktop.lock +*.elc +auto-save-list +tramp +.\#* + +# Org-mode +.org-id-locations +*_archive + +# flymake-mode +*_flymake.* + +# eshell files +/eshell/history +/eshell/lastdir + +# elpa packages +/elpa/ + +# reftex files +*.rel + +# AUCTeX auto folder +/auto/ + +# cask packages +.cask/ +dist/ + +# Flycheck +flycheck_*.el + +# server auth directory +/server/ + +# projectiles files +.projectile +projectile-bookmarks.eld + +# directory configuration +.dir-locals.el + +# saveplace +places + +# url cache +url/cache/ + +# cedet +ede-projects.el + +# smex +smex-items + +# company-statistics +company-statistics-cache.el + +# anaconda-mode +anaconda-mode/ + +### Intellij+all ### +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff: +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/dictionaries + +# Sensitive or high-churn files: +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.xml +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml + +# Gradle: +.idea/**/gradle.xml +.idea/**/libraries + +# CMake +cmake-build-debug/ + +# Mongo Explorer plugin: +.idea/**/mongoSettings.xml + +## File-based project format: +*.iws + +## Plugin-specific files: + +# IntelliJ +/out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# Ruby plugin and RubyMine +/.rakeTasks + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +### Intellij+all Patch ### +# Ignores the whole idea folder +# See https://github.com/joeblau/gitignore.io/issues/186 and https://github.com/joeblau/gitignore.io/issues/360 + +.idea/ + +### Java ### +# Compiled class file +*.class + +# Log file +*.log + +# BlueJ files +*.ctxt + +# Mobile Tools for Java (J2ME) +.mtj.tmp/ + +# Package Files # +*.jar +*.war +*.ear +*.zip +*.tar.gz +*.rar + +# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml +hs_err_pid* + +### Linux ### + +# temporary files which can be created if a process still has a handle open of a deleted file +.fuse_hidden* + +# KDE directory preferences +.directory + +# Linux trash folder which might appear on any partition or disk +.Trash-* + +# .nfs files are created when an open file is removed but is still being accessed +.nfs* + +### Python ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +.pytest_cache/ +nosetests.xml +coverage.xml +*.cover +.hypothesis/ + +# Translations +*.mo +*.pot + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule.* + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + +### Vim ### +# swap +.sw[a-p] +.*.sw[a-p] +# session +Session.vim +# temporary +.netrwhist +# auto-generated tag files +tags + +### Windows ### +# Windows thumbnail cache files +Thumbs.db +ehthumbs.db +ehthumbs_vista.db + +# Folder config file +Desktop.ini + +# Recycle Bin used on file shares +$RECYCLE.BIN/ + +# Windows Installer files +*.cab +*.msi +*.msm +*.msp + +# Windows shortcuts +*.lnk + + +# End of https://www.gitignore.io/api/vim,java,linux,emacs,python,eclipse,windows,intellij+all diff --git a/arch.md b/arch.md new file mode 100644 index 0000000..dfc85fd --- /dev/null +++ b/arch.md @@ -0,0 +1,48 @@ +# Architecture # + +## Partitioning ## + +# Protocol # + +| t | Client | Server | Comment | +|---|--------|--------|---------------| +| 2 | | | Send command | + +where ` = WRK | RES | REP` +(Work | Result | Report) + +## WRK ## + +Ask for work package + +| t | Client | Server | Comment | +|---|--------|----------|--------------------------------------------------| +| 2 | WRK | | Send work command | +| 3 | | | Work package: Start key, end key, number of keys | +| 4 | ACK | | Ack work | + +where ` = ` + ` a 160 bit hex number` + ` a 160 bit hex number strictly greater than ` + ` a 32 bit unsigned integer equalling - in base 10` + +## RES ## +MUST follow proper WRK + +Respond with result for work block. + +| t | Client | Server | Comment | +|---|--------------|--------|-----------------------------------------------------| +| 2 | RES | | Send result command with corresponding work package | +| 3 | | ACK | Ack receiving | +| 4 | | | Result type | +| 5 | | ACK | Ack receiving | + +where ` = NIL | ACK` + +if SUC: +| 6 | | | Private key hex if found | +| 7 | | ACK | Ack receiving | + +## REP ## +TODO diff --git a/bf/Makefile b/bf/Makefile new file mode 100644 index 0000000..51a7d01 --- /dev/null +++ b/bf/Makefile @@ -0,0 +1,71 @@ +HEADERS = bloom.h crack.h hash160.h warpwallet.h +OBJ_MAIN = brainflayer.o hex2blf.o blfchk.o ecmtabgen.o hexln.o filehex.o +OBJ_UTIL = hex.o bloom.o mmapf.o hsearchf.o ec_pubkey_fast.o ripemd160_256.o dldummy.o +OBJ_ALGO = $(patsubst %.c,%.o,$(wildcard algo/*.c)) +OBJECTS = $(OBJ_MAIN) $(OBJ_UTIL) $(OBJ_ALGO) +BINARIES = brainflayer hexln hex2blf blfchk ecmtabgen filehex +LIBS = -lssl -lrt -lcrypto -lz -lgmp +CFLAGS = -O3 \ + -flto -funsigned-char -falign-functions=16 -falign-loops=16 -falign-jumps=16 \ + -Wall -Wextra -Wno-pointer-sign -Wno-sign-compare \ + -pedantic -std=gnu11 +COMPILE = gcc $(CFLAGS) + +all: $(BINARIES) + +.git: + @echo 'This does not look like a cloned git repo. Unable to fetch submodules.' + @false + +secp256k1/.libs/libsecp256k1.a: .git + git submodule init + git submodule update + cd secp256k1; make distclean || true + cd secp256k1; ./autogen.sh + cd secp256k1; ./configure + cd secp256k1; make + +secp256k1/include/secp256k1.h: secp256k1/.libs/libsecp256k1.a + +scrypt-jane/scrypt-jane.h: .git + git submodule init + git submodule update + +scrypt-jane/scrypt-jane.o: scrypt-jane/scrypt-jane.h scrypt-jane/scrypt-jane.c + cd scrypt-jane; gcc -O3 -DSCRYPT_SALSA -DSCRYPT_SHA256 -c scrypt-jane.c -o scrypt-jane.o + +brainflayer.o: brainflayer.c secp256k1/include/secp256k1.h + +algo/warpwallet.o: algo/warpwallet.c scrypt-jane/scrypt-jane.h + +algo/brainwalletio.o: algo/brainwalletio.c scrypt-jane/scrypt-jane.h + +algo/brainv2.o: algo/brainv2.c scrypt-jane/scrypt-jane.h + +ec_pubkey_fast.o: ec_pubkey_fast.c secp256k1/include/secp256k1.h + $(COMPILE) -Wno-unused-function -c $< -o $@ + +%.o: %.c + $(COMPILE) -c $< -o $@ + +hexln: hexln.o hex.o + $(COMPILE) $^ $(LIBS) -o $@ + +blfchk: blfchk.o hex.o bloom.o mmapf.o hsearchf.o + $(COMPILE) $^ $(LIBS) -o $@ + +hex2blf: hex2blf.o hex.o bloom.o mmapf.o + $(COMPILE) $^ $(LIBS) -lm -o $@ + +ecmtabgen: ecmtabgen.o mmapf.o ec_pubkey_fast.o + $(COMPILE) $^ $(LIBS) -o $@ + +filehex: filehex.o hex.o + $(COMPILE) $^ $(LIBS) -o $@ + +brainflayer: brainflayer.o $(OBJ_UTIL) $(OBJ_ALGO) \ + secp256k1/.libs/libsecp256k1.a scrypt-jane/scrypt-jane.o + $(COMPILE) $^ $(LIBS) -o $@ + +clean: + rm -f $(BINARIES) $(OBJECTS) diff --git a/bf/README.md b/bf/README.md new file mode 100644 index 0000000..fb88464 --- /dev/null +++ b/bf/README.md @@ -0,0 +1,149 @@ +Brainflayer +=========== + +Brainflayer is a Proof-of-Concept brainwallet cracking tool that uses +[libsecp256k1](https://github.com/bitcoin/secp256k1) for pubkey generation. +It was originally released as part of my DEFCON talk about cracking brainwallets +([slides](https://rya.nc/dc23), [video](https://rya.nc/b6), [why](https://rya.nc/defcon-brainwallets.html)). + +The name is a reference to [Mind Flayers](https://en.wikipedia.org/wiki/Illithid), +a race of monsters from the Dungeons & Dragons role-playing game. They eat +brains, psionically enslave people and look like lovecraftian horrors. + +The current release is more than four times faster than the DEFCON release, and +many features have been added. + +If brainflayer is useful to you, please get in touch to let me know. I'm very +interested in any research it's being used for, and I'm generally happy to +collaborate with academic groups. + +Disclaimer +---------- +Just because you *can* steal someone's money doesn't mean you *should*. +Stealing would make you a jerk. Don't be a jerk. + +No support will be provided at this time, and I may ignore or close issues +requesting support without responding. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +Usage +----- + +### Basic + +Precompute the bloom filter: + +`hex2blf example.hex example.blf` + +Run Brainflayer against it: + +`brainflayer -v -b example.blf -i phraselist.txt` + +or + +`your_generator | brainflayer -v -b example.blf` + +### Advanced + +Brainflayer's design is heavily influenced by [Unix philosophy](https://en.wikipedia.org/wiki/Unix_philosophy). +It (mostly) does one thing: hunt for tasty brainwallets. A major feature it +does *not* have is generating candidate passwords/passphrases. There are plenty +of other great tools that do that, and brainflayer is happy to have you pipe +their output to it. + +Unfortunately, brainflayer is not currently multithreaded. If you want to have +it keep multiple cores busy, you'll have to come up with a way to distribute +the work yourself (brainflayer's -n and -k options may help). In my testing, +brainflayer benefits significantly from hyperthreading, so you may want to +run two copies per physical core. Also worth noting is that brainflayer mmaps +its data files in shared memory, so additional brainflayer processes do not +use up that much additional RAM. + +While not strictly required, it is *highly* recommended to use the following +options: + +* `-m FILE` Load the ecmult table from `FILE` (generated with `ecmtabgen`) + rather than computing it on startup. This will allow multiple + brainflayer processes to share the same table in memory, and + signifigantly reduce startup time when using a large table. + +* `-f FILE` Verify check bloom filter matches against `FILE`, a list of all + hash160s generated with + `sort -u example.hex | xxd -r -p > example.bin` + Enough addresses exist on the Bitcoin network to cause false + positives in the bloom filter, this option will suppress them. + +Brainflayer supports a few other types of input via the `-t` option: + +* `-t keccak` passphrases to be hashed with keccak256 (some ethereum tools) + +* `-t priv` raw private keys - this can be used to support arbitrary + deterministic wallet schemes via an external program. Any trailing + data after the hex encoded private key will be included in + brainflayer's output as well, for reference. See also the `-I` + option if you want to crack a bunch of sequential keys, which has + special speed optimizations. + +* `-t warp` salts or passwords/passphrases for WarpWallet + +* `-t bwio` salts or passwords/passphrases for brainwallet.io + +* `-t bv2` salts or passwords/passphrases for brainv2 - this one is *very* slow + on CPU, however the parameter choices make it a great target for GPUs + and FPGAs. + +* `-t rush` passwords for password-protected rushwallets - pass the fragment (the + part of the url after the #) using `-r`. Almost all wrong passwords + will be rejected even without a bloom filter. + +Address types can be specified with the `-c` option: + +* `-c u` uncompressed addresses + +* `-c c` compressed addresses + +* `-c e` ethereum addresses + +* `-c x` most signifigant bits of public point's x coordinate + +It's possible to combine two or more of these, e.g. the default is `-c uc`. + +An incremental private key brute force mode is available for fans of +[directory.io](http://www.directory.io/), try + +`brainflayer -v -I 0000000000000000000000000000000000000000000000000000000000000001 -b example.blf` + +See the output of `brainflayer -h` for more detailed usage info. + +Also included is `blfchk` - you can pipe it hex encoded hash160 to check a +bloom filter file for. It's very fast - it can easily check millions of +hash160s per second. Not entirely sure what this is good for but I'm sure +you'll come up with something. + +Building +-------- + +Should compile on Linux with `make` provided you have the required devel libs +installed (at least openssl and gpm are required along with libsecp256k1's +build dependencies). I really need to learn autotools. If you file an issue +about a build failure in libsecp256k1 I will close it. + +Authors +------- + +The bulk of Brainflayer was written by Ryan Castellucci. Nicolas Courtois and +Guangyan Song contributed the code in `ec_pubkey_fast.c` which more than +doubles the speed of public key computations compared with the stock secp256k1 +library from Bitcoin. This code uses a much larger table for ec multiplication +and optimized routines for ec addition and doubling. diff --git a/bf/algo/brainv2.c b/bf/algo/brainv2.c new file mode 100644 index 0000000..6e73f50 --- /dev/null +++ b/bf/algo/brainv2.c @@ -0,0 +1,60 @@ +/* Copyright (c) 2015 Ryan Castellucci, All Rights Reserved */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +// crypto.h used for the version +#include + +#include "../scrypt-jane/scrypt-jane.h" + +#include "../hex.h" + +#include "brainv2.h" + +#define SALT_BITS_PER_THREAD 256 +#define SALT_BYTES_PER_THREAD (SALT_BITS_PER_THREAD / 8) +#define THREADS 256 +#define KEY_SIZE 128 + +#define first_scrypt(p, pl, s, ss, k, ks) \ + scrypt(p, pl, s, ss, 13, 0, 6, k, ks) +#define middle_scrypt(p, pl, s, ss, k, ks) \ + scrypt(p, pl, s, ss, 15, 0, 6, k, ks) +#define last_scrypt(p, pl, s, ss, k, ks) \ + scrypt(p, pl, s, ss, 13, 0, 6, k, ks) + +int brainv2(unsigned char *pass, size_t pass_sz, + unsigned char *salt, size_t salt_sz, + unsigned char *out) { + unsigned char key1[THREADS*SALT_BYTES_PER_THREAD*2]; + unsigned char key2[THREADS*SALT_BYTES_PER_THREAD]; + unsigned char key3[16]; + + int key1_sz = sizeof(key1); + int key2_sz = sizeof(key2); + int key3_sz = sizeof(key3); + + int t; + + first_scrypt(pass, pass_sz, salt, salt_sz, key1, key1_sz); + for (t = 0; t < THREADS; ++t) { + middle_scrypt(key1+((t*2+0)*SALT_BYTES_PER_THREAD), SALT_BYTES_PER_THREAD, + key1+((t*2+1)*SALT_BYTES_PER_THREAD), SALT_BYTES_PER_THREAD, + key2+(t*SALT_BYTES_PER_THREAD), SALT_BYTES_PER_THREAD); + } + last_scrypt(pass, pass_sz, key2, key2_sz, key3, key3_sz); + + hex(key3, key3_sz, out, 33); + + return 0; +} diff --git a/bf/algo/brainv2.h b/bf/algo/brainv2.h new file mode 100644 index 0000000..90c00c9 --- /dev/null +++ b/bf/algo/brainv2.h @@ -0,0 +1,8 @@ +/* Copyright (c) 2015 Ryan Castellucci, All Rights Reserved */ +#ifndef __BRAINFLAYER_BRAINV2_H_ +#define __BRAINFLAYER_BRAINV2_H_ + +int brainv2(unsigned char *, size_t, unsigned char *, size_t, unsigned char *); + +/* vim: set ts=2 sw=2 et ai si: */ +#endif /* __BRAINFLAYER_BRAINV2_H_ */ diff --git a/bf/algo/brainwalletio.c b/bf/algo/brainwalletio.c new file mode 100644 index 0000000..f92f369 --- /dev/null +++ b/bf/algo/brainwalletio.c @@ -0,0 +1,45 @@ +/* Copyright (c) 2015 Ryan Castellucci, All Rights Reserved */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +// crypto.h used for the version +#include + +#include "../scrypt-jane/scrypt-jane.h" + +#include "../hex.h" + +#define _SCRYPT_N (1<<18) +#define _SCRYPT_r 8 +#define _SCRYPT_p 1 + +#define jane_scrypt(p, pl, s, ss, k, ks) \ + scrypt(p, pl, s, ss, 17, 3, 0, k, ks) + +static SHA256_CTX sha256_ctx; + +int brainwalletio(unsigned char *pass, size_t pass_sz, + unsigned char *salt, size_t salt_sz, + unsigned char *out) { + unsigned char seed1[32], seed2[65]; + + int seed1_sz = sizeof(seed1), seed2_sz = (sizeof(seed2) - 1); + + jane_scrypt(pass, pass_sz, salt, salt_sz, seed1, seed1_sz); + hex(seed1, seed1_sz, seed2, seed2_sz); + SHA256_Init(&sha256_ctx); + SHA256_Update(&sha256_ctx, seed2, seed2_sz); + SHA256_Final(out, &sha256_ctx); + + return 0; +} diff --git a/bf/algo/brainwalletio.h b/bf/algo/brainwalletio.h new file mode 100644 index 0000000..4cebf2a --- /dev/null +++ b/bf/algo/brainwalletio.h @@ -0,0 +1,8 @@ +/* Copyright (c) 2015 Ryan Castellucci, All Rights Reserved */ +#ifndef __BRAINFLAYER_BRAINWALLETIO_H_ +#define __BRAINFLAYER_BRAINWALLETIO_H_ + +int brainwalletio(unsigned char *, size_t, unsigned char *, size_t, unsigned char *); + +/* vim: set ts=2 sw=2 et ai si: */ +#endif /* __BRAINFLAYER_BRAINWALLETIO_H_ */ diff --git a/bf/algo/keccak.c b/bf/algo/keccak.c new file mode 100644 index 0000000..90972c9 --- /dev/null +++ b/bf/algo/keccak.c @@ -0,0 +1,182 @@ +/*- + * Copyright (c) 2015 Taylor R. Campbell + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#define _POSIX_C_SOURCE 200809L + +#include + +#include "keccak.h" + +#define secret /* can't use in variable-time operations, should zero */ + +#define FOR5(X, STMT) do \ +{ \ + (X) = 0; (STMT); \ + (X) = 1; (STMT); \ + (X) = 2; (STMT); \ + (X) = 3; (STMT); \ + (X) = 4; (STMT); \ +} while (0) + +static inline secret uint64_t +rol64(secret uint64_t v, unsigned c) +{ + + return ((v << c) | (v >> (64 - c))); +} + +static inline void +keccakf1600_theta(secret uint64_t A[25]) +{ + secret uint64_t C0, C1, C2, C3, C4; + unsigned y; + + C0 = C1 = C2 = C3 = C4 = 0; +#pragma GCC diagnostic ignored "-pedantic" + FOR5(y, { + C0 ^= A[0 + 5*y]; + C1 ^= A[1 + 5*y]; + C2 ^= A[2 + 5*y]; + C3 ^= A[3 + 5*y]; + C4 ^= A[4 + 5*y]; + }); + FOR5(y, { + A[0 + 5*y] ^= C4 ^ rol64(C1, 1); + A[1 + 5*y] ^= C0 ^ rol64(C2, 1); + A[2 + 5*y] ^= C1 ^ rol64(C3, 1); + A[3 + 5*y] ^= C2 ^ rol64(C4, 1); + A[4 + 5*y] ^= C3 ^ rol64(C0, 1); + }); +#pragma GCC diagnostic pop +} + +static inline void +keccakf1600_rho_pi(secret uint64_t A[25]) +{ + secret uint64_t T, U; + + /* + * Permute by (x,y) |---> (y, 2x + 3y mod 5) starting at (1,0), + * rotate the ith element by (i + 1)(i + 2)/2 mod 64. + */ + U = A[ 1]; T = U; + U = A[10]; A[10] = rol64(T, 1); T = U; + U = A[ 7]; A[ 7] = rol64(T, 3); T = U; + U = A[11]; A[11] = rol64(T, 6); T = U; + U = A[17]; A[17] = rol64(T, 10); T = U; + U = A[18]; A[18] = rol64(T, 15); T = U; + U = A[ 3]; A[ 3] = rol64(T, 21); T = U; + U = A[ 5]; A[ 5] = rol64(T, 28); T = U; + U = A[16]; A[16] = rol64(T, 36); T = U; + U = A[ 8]; A[ 8] = rol64(T, 45); T = U; + U = A[21]; A[21] = rol64(T, 55); T = U; + U = A[24]; A[24] = rol64(T, 2); T = U; + U = A[ 4]; A[ 4] = rol64(T, 14); T = U; + U = A[15]; A[15] = rol64(T, 27); T = U; + U = A[23]; A[23] = rol64(T, 41); T = U; + U = A[19]; A[19] = rol64(T, 56); T = U; + U = A[13]; A[13] = rol64(T, 8); T = U; + U = A[12]; A[12] = rol64(T, 25); T = U; + U = A[ 2]; A[ 2] = rol64(T, 43); T = U; + U = A[20]; A[20] = rol64(T, 62); T = U; + U = A[14]; A[14] = rol64(T, 18); T = U; + U = A[22]; A[22] = rol64(T, 39); T = U; + U = A[ 9]; A[ 9] = rol64(T, 61); T = U; + U = A[ 6]; A[ 6] = rol64(T, 20); T = U; + A[ 1] = rol64(T, 44); +} + +static inline void +keccakf1600_chi(secret uint64_t A[25]) +{ + secret uint64_t B0, B1, B2, B3, B4; + unsigned y; + +#pragma GCC diagnostic ignored "-pedantic" + FOR5(y, { + B0 = A[0 + 5*y]; + B1 = A[1 + 5*y]; + B2 = A[2 + 5*y]; + B3 = A[3 + 5*y]; + B4 = A[4 + 5*y]; + A[0 + 5*y] ^= ~B1 & B2; + A[1 + 5*y] ^= ~B2 & B3; + A[2 + 5*y] ^= ~B3 & B4; + A[3 + 5*y] ^= ~B4 & B0; + A[4 + 5*y] ^= ~B0 & B1; + }); +#pragma GCC diagnostic pop +} + +static void +keccakf1600_round(secret uint64_t A[25]) +{ + + keccakf1600_theta(A); + keccakf1600_rho_pi(A); + keccakf1600_chi(A); +} + +void +keccakf1600(secret uint64_t A[25]) +{ + /* + * RC[i] = \sum_{j = 0,...,6} rc(j + 7i) 2^(2^j - 1), + * rc(t) = (x^t mod x^8 + x^6 + x^5 + x^4 + 1) mod x in GF(2)[x] + */ + static const uint64_t RC[24] = { + 0x0000000000000001ULL, + 0x0000000000008082ULL, + 0x800000000000808aULL, + 0x8000000080008000ULL, + 0x000000000000808bULL, + 0x0000000080000001ULL, + 0x8000000080008081ULL, + 0x8000000000008009ULL, + 0x000000000000008aULL, + 0x0000000000000088ULL, + 0x0000000080008009ULL, + 0x000000008000000aULL, + 0x000000008000808bULL, + 0x800000000000008bULL, + 0x8000000000008089ULL, + 0x8000000000008003ULL, + 0x8000000000008002ULL, + 0x8000000000000080ULL, + 0x000000000000800aULL, + 0x800000008000000aULL, + 0x8000000080008081ULL, + 0x8000000000008080ULL, + 0x0000000080000001ULL, + 0x8000000080008008ULL, + }; + unsigned i; + + for (i = 0; i < 24; i++) { + keccakf1600_round(A); + A[0] ^= RC[i]; + } +} diff --git a/bf/algo/keccak.h b/bf/algo/keccak.h new file mode 100644 index 0000000..51a8e02 --- /dev/null +++ b/bf/algo/keccak.h @@ -0,0 +1,34 @@ +/*- + * Copyright (c) 2015 Taylor R. Campbell + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef KECCAK_H +#define KECCAK_H + +#include + +void keccakf1600(uint64_t A[25]); + +#endif /* KECCAK_H */ diff --git a/bf/algo/sha3.c b/bf/algo/sha3.c new file mode 100644 index 0000000..edee7a5 --- /dev/null +++ b/bf/algo/sha3.c @@ -0,0 +1,674 @@ +/*- + * Copyright (c) 2015 Taylor R. Campbell + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * SHA-3: FIPS-202, Permutation-Based Hash and Extendable-Ouptut Functions + */ + +#define _POSIX_C_SOURCE 200809L + +#include +#include +#include +#include + +#include "keccak.h" + +#include "sha3.h" + +#define MIN(a,b) ((a) < (b) ? (a) : (b)) + +void *(*volatile sha3_explicit_memset_impl)(void *, int, size_t) = &memset; +static void * +explicit_memset(void *buf, int c, size_t n) +{ + + return (*sha3_explicit_memset_impl)(buf, c, n); +} + +static inline uint64_t +le64dec(const void *buf) +{ + const uint8_t *p = buf; + + return (((uint64_t)p[0]) | + ((uint64_t)p[1] << 8) | + ((uint64_t)p[2] << 16) | + ((uint64_t)p[3] << 24) | + ((uint64_t)p[4] << 32) | + ((uint64_t)p[5] << 40) | + ((uint64_t)p[6] << 48) | + ((uint64_t)p[7] << 56)); +} + +static inline void +le64enc(void *buf, uint64_t v) +{ + uint8_t *p = buf; + + *p++ = v; v >>= 8; + *p++ = v; v >>= 8; + *p++ = v; v >>= 8; + *p++ = v; v >>= 8; + *p++ = v; v >>= 8; + *p++ = v; v >>= 8; + *p++ = v; v >>= 8; + *p++ = v; +} + +/* + * Common body. All the SHA-3 functions share code structure. They + * differ only in the size of the chunks they split the message into: + * for digest size d, they are split into chunks of 200 - d bytes. + */ + +static inline unsigned +sha3_rate(unsigned d) +{ + const unsigned cw = 2*d/8; /* capacity in words */ + + return 25 - cw; +} + +static void +sha3_init(struct sha3 *C, unsigned rw) +{ + unsigned iw; + + C->nb = 8*rw; + for (iw = 0; iw < 25; iw++) + C->A[iw] = 0; +} + +static void +sha3_update(struct sha3 *C, const uint8_t *data, size_t len, unsigned rw) +{ + uint64_t T; + unsigned ib, iw; /* index of byte/word */ + + assert(0 < C->nb); + + /* If there's a partial word, try to fill it. */ + if ((C->nb % 8) != 0) { + T = 0; + for (ib = 0; ib < MIN(len, C->nb % 8); ib++) + T |= (uint64_t)data[ib] << (8*ib); + C->A[rw - (C->nb + 7)/8] ^= T << (8*(8 - (C->nb % 8))); + C->nb -= ib; + data += ib; + len -= ib; + + /* If we filled the buffer, permute now. */ + if (C->nb == 0) { + keccakf1600(C->A); + C->nb = 8*rw; + } + + /* If that exhausted the input, we're done. */ + if (len == 0) + return; + } + + /* At a word boundary. Fill any partial buffer. */ + assert((C->nb % 8) == 0); + if (C->nb < 8*rw) { + for (iw = 0; iw < MIN(len, C->nb)/8; iw++) + C->A[rw - C->nb/8 + iw] ^= le64dec(data + 8*iw); + C->nb -= 8*iw; + data += 8*iw; + len -= 8*iw; + + /* If we filled the buffer, permute now. */ + if (C->nb == 0) { + keccakf1600(C->A); + C->nb = 8*rw; + } else { + /* Otherwise, less than a word left. */ + assert(len < 8); + goto partial; + } + } + + /* At a buffer boundary. Absorb input one buffer at a time. */ + assert(C->nb == 8*rw); + while (8*rw <= len) { + for (iw = 0; iw < rw; iw++) + C->A[iw] ^= le64dec(data + 8*iw); + keccakf1600(C->A); + data += 8*rw; + len -= 8*rw; + } + + /* Partially fill the buffer with as many words as we can. */ + for (iw = 0; iw < len/8; iw++) + C->A[rw - C->nb/8 + iw] ^= le64dec(data + 8*iw); + C->nb -= 8*iw; + data += 8*iw; + len -= 8*iw; + +partial: + /* Partially fill the last word with as many bytes as we can. */ + assert(len < 8); + assert(0 < C->nb); + assert((C->nb % 8) == 0); + T = 0; + for (ib = 0; ib < len; ib++) + T |= (uint64_t)data[ib] << (8*ib); + C->A[rw - C->nb/8] ^= T; + C->nb -= ib; + assert(0 < C->nb); +} + +static inline void +sha3_or_keccak_final(uint8_t *h, unsigned d, struct sha3 *C, unsigned rw, uint64_t padding) +{ + unsigned nw, iw; + + assert(d <= 8*25); + assert(0 < C->nb); + + /* Append 01, pad with 10*1 up to buffer boundary, LSB first. */ + nw = (C->nb + 7)/8; + assert(0 < nw); + assert(nw <= rw); + C->A[rw - nw] ^= padding << (8*(8*nw - C->nb)); + C->A[rw - 1] ^= 0x8000000000000000ULL; + + /* Permute one last time. */ + keccakf1600(C->A); + + /* Reveal the first 8d bits of state, forget 1600-8d of them. */ + for (iw = 0; iw < d/8; iw++) + le64enc(h + 8*iw, C->A[iw]); + h += 8*iw; + d -= 8*iw; + if (0 < d) { + /* For SHA3-224, we need to expose a partial word. */ + uint64_t T = C->A[iw]; + do { + *h++ = T & 0xff; + T >>= 8; + } while (--d); + } + (void)explicit_memset(C->A, 0, sizeof C->A); + C->nb = 0; +} + +static void +sha3_final(uint8_t *h, unsigned d, struct sha3 *C, unsigned rw) +{ + sha3_or_keccak_final(h, d, C, rw, 0x06); +} + +static void +keccak_final(uint8_t *h, unsigned d, struct sha3 *C, unsigned rw) +{ + sha3_or_keccak_final(h, d, C, rw, 0x01); +} + +static void +shake_final(uint8_t *h, unsigned d, struct sha3 *C, unsigned rw) +{ + unsigned nw, iw; + + assert(0 < C->nb); + + /* Append 1111, pad with 10*1 up to buffer boundary, LSB first. */ + nw = (C->nb + 7)/8; + assert(0 < nw); + assert(nw <= rw); + C->A[rw - nw] ^= (uint64_t)0x1f << (8*(8*nw - C->nb)); + C->A[rw - 1] ^= 0x8000000000000000ULL; + + /* Permute, reveal first rw words of state, repeat. */ + while (8*rw <= d) { + keccakf1600(C->A); + for (iw = 0; iw < rw; iw++) + le64enc(h + 8*iw, C->A[iw]); + h += 8*iw; + d -= 8*iw; + } + + /* + * If 8*rw (the output rate in bytes) does not divide d, more + * words are wanted: permute again and reveal a little more. + */ + if (0 < d) { + keccakf1600(C->A); + for (iw = 0; iw < d/8; iw++) + le64enc(h + 8*iw, C->A[iw]); + h += 8*iw; + d -= 8*iw; + + /* + * If 8 does not divide d, more bytes are wanted: + * reveal them. + */ + if (0 < d) { + uint64_t T = C->A[iw]; + do { + *h++ = T & 0xff; + T >>= 8; + } while (--d); + } + } + + (void)explicit_memset(C->A, 0, sizeof C->A); + C->nb = 0; +} + +void +SHA3_224_Init(SHA3_224_CTX *C) +{ + + sha3_init(&C->C224, sha3_rate(SHA3_224_DIGEST_LENGTH)); +} + +void +SHA3_224_Update(SHA3_224_CTX *C, const uint8_t *data, size_t len) +{ + + sha3_update(&C->C224, data, len, sha3_rate(SHA3_224_DIGEST_LENGTH)); +} + +void +SHA3_224_Final(uint8_t h[SHA3_224_DIGEST_LENGTH], SHA3_224_CTX *C) +{ + + sha3_final(h, SHA3_224_DIGEST_LENGTH, &C->C224, + sha3_rate(SHA3_224_DIGEST_LENGTH)); +} + +void +SHA3_256_Init(SHA3_256_CTX *C) +{ + + sha3_init(&C->C256, sha3_rate(SHA3_256_DIGEST_LENGTH)); +} + +void +SHA3_256_Update(SHA3_256_CTX *C, const uint8_t *data, size_t len) +{ + + sha3_update(&C->C256, data, len, sha3_rate(SHA3_256_DIGEST_LENGTH)); +} + +void +SHA3_256_Final(uint8_t h[SHA3_256_DIGEST_LENGTH], SHA3_256_CTX *C) +{ + + sha3_final(h, SHA3_256_DIGEST_LENGTH, &C->C256, + sha3_rate(SHA3_256_DIGEST_LENGTH)); +} + +void +SHA3_384_Init(SHA3_384_CTX *C) +{ + + sha3_init(&C->C384, sha3_rate(SHA3_384_DIGEST_LENGTH)); +} + +void +SHA3_384_Update(SHA3_384_CTX *C, const uint8_t *data, size_t len) +{ + + sha3_update(&C->C384, data, len, sha3_rate(SHA3_384_DIGEST_LENGTH)); +} + +void +SHA3_384_Final(uint8_t h[SHA3_384_DIGEST_LENGTH], SHA3_384_CTX *C) +{ + + sha3_final(h, SHA3_384_DIGEST_LENGTH, &C->C384, + sha3_rate(SHA3_384_DIGEST_LENGTH)); +} + +void +SHA3_512_Init(SHA3_512_CTX *C) +{ + + sha3_init(&C->C512, sha3_rate(SHA3_512_DIGEST_LENGTH)); +} + +void +SHA3_512_Update(SHA3_512_CTX *C, const uint8_t *data, size_t len) +{ + + sha3_update(&C->C512, data, len, sha3_rate(SHA3_512_DIGEST_LENGTH)); +} + +void +SHA3_512_Final(uint8_t h[SHA3_512_DIGEST_LENGTH], SHA3_512_CTX *C) +{ + + sha3_final(h, SHA3_512_DIGEST_LENGTH, &C->C512, + sha3_rate(SHA3_512_DIGEST_LENGTH)); +} + +void +SHAKE128_Init(SHAKE128_CTX *C) +{ + + sha3_init(&C->C128, sha3_rate(128/8)); +} + +void +SHAKE128_Update(SHAKE128_CTX *C, const uint8_t *data, size_t len) +{ + + sha3_update(&C->C128, data, len, sha3_rate(128/8)); +} + +void +SHAKE128_Final(uint8_t *h, size_t d, SHAKE128_CTX *C) +{ + + shake_final(h, d, &C->C128, sha3_rate(128/8)); +} + +void +SHAKE256_Init(SHAKE256_CTX *C) +{ + + sha3_init(&C->C256, sha3_rate(256/8)); +} + +void +SHAKE256_Update(SHAKE256_CTX *C, const uint8_t *data, size_t len) +{ + + sha3_update(&C->C256, data, len, sha3_rate(256/8)); +} + +void +SHAKE256_Final(uint8_t *h, size_t d, SHAKE256_CTX *C) +{ + + shake_final(h, d, &C->C256, sha3_rate(256/8)); +} + +void +KECCAK_256_Final(uint8_t h[SHA3_256_DIGEST_LENGTH], SHA3_256_CTX *C) +{ + + keccak_final(h, SHA3_256_DIGEST_LENGTH, &C->C256, + sha3_rate(SHA3_256_DIGEST_LENGTH)); +} + +void +KECCAK_384_Final(uint8_t h[SHA3_384_DIGEST_LENGTH], SHA3_384_CTX *C) +{ + + keccak_final(h, SHA3_384_DIGEST_LENGTH, &C->C384, + sha3_rate(SHA3_384_DIGEST_LENGTH)); +} + +void +KECCAK_512_Final(uint8_t h[SHA3_512_DIGEST_LENGTH], SHA3_512_CTX *C) +{ + + keccak_final(h, SHA3_512_DIGEST_LENGTH, &C->C512, + sha3_rate(SHA3_512_DIGEST_LENGTH)); +} + +static void +sha3_selftest_prng(void *buf, size_t len, uint32_t seed) +{ + uint8_t *p = buf; + size_t n = len; + uint32_t t, a, b; + + a = 0xdead4bad * seed; + b = 1; + + while (n--) { + t = a + b; + *p++ = t >> 24; + a = b; + b = t; + } +} + +int +SHA3_Selftest(void) +{ + const uint8_t d224_0[] = { /* SHA3-224(0-bit) */ + 0x6b,0x4e,0x03,0x42,0x36,0x67,0xdb,0xb7, + 0x3b,0x6e,0x15,0x45,0x4f,0x0e,0xb1,0xab, + 0xd4,0x59,0x7f,0x9a,0x1b,0x07,0x8e,0x3f, + 0x5b,0x5a,0x6b,0xc7, + }; + const uint8_t d256_0[] = { /* SHA3-256(0-bit) */ + 0xa7,0xff,0xc6,0xf8,0xbf,0x1e,0xd7,0x66, + 0x51,0xc1,0x47,0x56,0xa0,0x61,0xd6,0x62, + 0xf5,0x80,0xff,0x4d,0xe4,0x3b,0x49,0xfa, + 0x82,0xd8,0x0a,0x4b,0x80,0xf8,0x43,0x4a, + }; + const uint8_t d384_0[] = { /* SHA3-384(0-bit) */ + 0x0c,0x63,0xa7,0x5b,0x84,0x5e,0x4f,0x7d, + 0x01,0x10,0x7d,0x85,0x2e,0x4c,0x24,0x85, + 0xc5,0x1a,0x50,0xaa,0xaa,0x94,0xfc,0x61, + 0x99,0x5e,0x71,0xbb,0xee,0x98,0x3a,0x2a, + 0xc3,0x71,0x38,0x31,0x26,0x4a,0xdb,0x47, + 0xfb,0x6b,0xd1,0xe0,0x58,0xd5,0xf0,0x04, + }; + const uint8_t d512_0[] = { /* SHA3-512(0-bit) */ + 0xa6,0x9f,0x73,0xcc,0xa2,0x3a,0x9a,0xc5, + 0xc8,0xb5,0x67,0xdc,0x18,0x5a,0x75,0x6e, + 0x97,0xc9,0x82,0x16,0x4f,0xe2,0x58,0x59, + 0xe0,0xd1,0xdc,0xc1,0x47,0x5c,0x80,0xa6, + 0x15,0xb2,0x12,0x3a,0xf1,0xf5,0xf9,0x4c, + 0x11,0xe3,0xe9,0x40,0x2c,0x3a,0xc5,0x58, + 0xf5,0x00,0x19,0x9d,0x95,0xb6,0xd3,0xe3, + 0x01,0x75,0x85,0x86,0x28,0x1d,0xcd,0x26, + }; + const uint8_t shake128_0_41[] = { /* SHAKE128(0-bit, 41) */ + 0x7f,0x9c,0x2b,0xa4,0xe8,0x8f,0x82,0x7d, + 0x61,0x60,0x45,0x50,0x76,0x05,0x85,0x3e, + 0xd7,0x3b,0x80,0x93,0xf6,0xef,0xbc,0x88, + 0xeb,0x1a,0x6e,0xac,0xfa,0x66,0xef,0x26, + 0x3c,0xb1,0xee,0xa9,0x88,0x00,0x4b,0x93,0x10, + }; + const uint8_t shake256_0_73[] = { /* SHAKE256(0-bit, 73) */ + 0x46,0xb9,0xdd,0x2b,0x0b,0xa8,0x8d,0x13, + 0x23,0x3b,0x3f,0xeb,0x74,0x3e,0xeb,0x24, + 0x3f,0xcd,0x52,0xea,0x62,0xb8,0x1b,0x82, + 0xb5,0x0c,0x27,0x64,0x6e,0xd5,0x76,0x2f, + 0xd7,0x5d,0xc4,0xdd,0xd8,0xc0,0xf2,0x00, + 0xcb,0x05,0x01,0x9d,0x67,0xb5,0x92,0xf6, + 0xfc,0x82,0x1c,0x49,0x47,0x9a,0xb4,0x86, + 0x40,0x29,0x2e,0xac,0xb3,0xb7,0xc4,0xbe, + 0x14,0x1e,0x96,0x61,0x6f,0xb1,0x39,0x57,0x69, + }; + const uint8_t d224_1600[] = { /* SHA3-224(200 * 0xa3) */ + 0x93,0x76,0x81,0x6a,0xba,0x50,0x3f,0x72, + 0xf9,0x6c,0xe7,0xeb,0x65,0xac,0x09,0x5d, + 0xee,0xe3,0xbe,0x4b,0xf9,0xbb,0xc2,0xa1, + 0xcb,0x7e,0x11,0xe0, + }; + const uint8_t d256_1600[] = { /* SHA3-256(200 * 0xa3) */ + 0x79,0xf3,0x8a,0xde,0xc5,0xc2,0x03,0x07, + 0xa9,0x8e,0xf7,0x6e,0x83,0x24,0xaf,0xbf, + 0xd4,0x6c,0xfd,0x81,0xb2,0x2e,0x39,0x73, + 0xc6,0x5f,0xa1,0xbd,0x9d,0xe3,0x17,0x87, + }; + const uint8_t d384_1600[] = { /* SHA3-384(200 * 0xa3) */ + 0x18,0x81,0xde,0x2c,0xa7,0xe4,0x1e,0xf9, + 0x5d,0xc4,0x73,0x2b,0x8f,0x5f,0x00,0x2b, + 0x18,0x9c,0xc1,0xe4,0x2b,0x74,0x16,0x8e, + 0xd1,0x73,0x26,0x49,0xce,0x1d,0xbc,0xdd, + 0x76,0x19,0x7a,0x31,0xfd,0x55,0xee,0x98, + 0x9f,0x2d,0x70,0x50,0xdd,0x47,0x3e,0x8f, + }; + const uint8_t d512_1600[] = { /* SHA3-512(200 * 0xa3) */ + 0xe7,0x6d,0xfa,0xd2,0x20,0x84,0xa8,0xb1, + 0x46,0x7f,0xcf,0x2f,0xfa,0x58,0x36,0x1b, + 0xec,0x76,0x28,0xed,0xf5,0xf3,0xfd,0xc0, + 0xe4,0x80,0x5d,0xc4,0x8c,0xae,0xec,0xa8, + 0x1b,0x7c,0x13,0xc3,0x0a,0xdf,0x52,0xa3, + 0x65,0x95,0x84,0x73,0x9a,0x2d,0xf4,0x6b, + 0xe5,0x89,0xc5,0x1c,0xa1,0xa4,0xa8,0x41, + 0x6d,0xf6,0x54,0x5a,0x1c,0xe8,0xba,0x00, + }; + const uint8_t shake128_1600_41[] = { /* SHAKE128(200 * 0xa3, 41) */ + 0x13,0x1a,0xb8,0xd2,0xb5,0x94,0x94,0x6b, + 0x9c,0x81,0x33,0x3f,0x9b,0xb6,0xe0,0xce, + 0x75,0xc3,0xb9,0x31,0x04,0xfa,0x34,0x69, + 0xd3,0x91,0x74,0x57,0x38,0x5d,0xa0,0x37, + 0xcf,0x23,0x2e,0xf7,0x16,0x4a,0x6d,0x1e,0xb4, + }; + const uint8_t shake256_1600_73[] = { /* SHAKE256(200 * 0xa3, 73) */ + 0xcd,0x8a,0x92,0x0e,0xd1,0x41,0xaa,0x04, + 0x07,0xa2,0x2d,0x59,0x28,0x86,0x52,0xe9, + 0xd9,0xf1,0xa7,0xee,0x0c,0x1e,0x7c,0x1c, + 0xa6,0x99,0x42,0x4d,0xa8,0x4a,0x90,0x4d, + 0x2d,0x70,0x0c,0xaa,0xe7,0x39,0x6e,0xce, + 0x96,0x60,0x44,0x40,0x57,0x7d,0xa4,0xf3, + 0xaa,0x22,0xae,0xb8,0x85,0x7f,0x96,0x1c, + 0x4c,0xd8,0xe0,0x6f,0x0a,0xe6,0x61,0x0b, + 0x10,0x48,0xa7,0xf6,0x4e,0x10,0x74,0xcd,0x62, + }; + const uint8_t d0[] = { + 0x6c,0x02,0x1a,0xc6,0x65,0xaf,0x80,0xfb, + 0x52,0xe6,0x2d,0x27,0xe5,0x02,0x88,0x84, + 0xec,0x1c,0x0c,0xe7,0x0b,0x94,0x55,0x83, + 0x19,0xf2,0xbf,0x09,0x86,0xeb,0x1a,0xbb, + 0xc3,0x0d,0x1c,0xef,0x22,0xfe,0xc5,0x4c, + 0x45,0x90,0x66,0x14,0x00,0x6e,0xc8,0x79, + 0xdf,0x1e,0x02,0xbd,0x75,0xe9,0x60,0xd8, + 0x60,0x39,0x85,0xc9,0xc4,0xee,0x33,0xab, + }; + const unsigned mlen[6] = { 0, 3, 128, 129, 255, 1024 }; + uint8_t m[1024], d[73]; + SHA3_224_CTX sha3224; + SHA3_256_CTX sha3256; + SHA3_384_CTX sha3384; + SHA3_512_CTX sha3512; + SHAKE128_CTX shake128; + SHAKE256_CTX shake256; + SHA3_512_CTX ctx; + unsigned mi; + + /* + * NIST test vectors from + * : + * 0-bit, 1600-bit repeated 0xa3 (= 0b10100011). + */ + SHA3_224_Init(&sha3224); + SHA3_224_Final(d, &sha3224); + if (memcmp(d, d224_0, 28) != 0) + return -1; + SHA3_256_Init(&sha3256); + SHA3_256_Final(d, &sha3256); + if (memcmp(d, d256_0, 32) != 0) + return -1; + SHA3_384_Init(&sha3384); + SHA3_384_Final(d, &sha3384); + if (memcmp(d, d384_0, 48) != 0) + return -1; + SHA3_512_Init(&sha3512); + SHA3_512_Final(d, &sha3512); + if (memcmp(d, d512_0, 64) != 0) + return -1; + SHAKE128_Init(&shake128); + SHAKE128_Final(d, 41, &shake128); + if (memcmp(d, shake128_0_41, 41) != 0) + return -1; + SHAKE256_Init(&shake256); + SHAKE256_Final(d, 73, &shake256); + if (memcmp(d, shake256_0_73, 73) != 0) + return -1; + + (void)memset(m, 0xa3, 200); + SHA3_224_Init(&sha3224); + SHA3_224_Update(&sha3224, m, 200); + SHA3_224_Final(d, &sha3224); + if (memcmp(d, d224_1600, 28) != 0) + return -1; + SHA3_256_Init(&sha3256); + SHA3_256_Update(&sha3256, m, 200); + SHA3_256_Final(d, &sha3256); + if (memcmp(d, d256_1600, 32) != 0) + return -1; + SHA3_384_Init(&sha3384); + SHA3_384_Update(&sha3384, m, 200); + SHA3_384_Final(d, &sha3384); + if (memcmp(d, d384_1600, 48) != 0) + return -1; + SHA3_512_Init(&sha3512); + SHA3_512_Update(&sha3512, m, 200); + SHA3_512_Final(d, &sha3512); + if (memcmp(d, d512_1600, 64) != 0) + return -1; + SHAKE128_Init(&shake128); + SHAKE128_Update(&shake128, m, 200); + SHAKE128_Final(d, 41, &shake128); + if (memcmp(d, shake128_1600_41, 41) != 0) + return -1; + SHAKE256_Init(&shake256); + SHAKE256_Update(&shake256, m, 200); + SHAKE256_Final(d, 73, &shake256); + if (memcmp(d, shake256_1600_73, 73) != 0) + return -1; + + /* + * Hand-crufted test vectors with unaligned message lengths. + */ + SHA3_512_Init(&ctx); + for (mi = 0; mi < 6; mi++) { + sha3_selftest_prng(m, mlen[mi], (224/8)*mlen[mi]); + SHA3_224_Init(&sha3224); + SHA3_224_Update(&sha3224, m, mlen[mi]); + SHA3_224_Final(d, &sha3224); + SHA3_512_Update(&ctx, d, 224/8); + } + for (mi = 0; mi < 6; mi++) { + sha3_selftest_prng(m, mlen[mi], (256/8)*mlen[mi]); + SHA3_256_Init(&sha3256); + SHA3_256_Update(&sha3256, m, mlen[mi]); + SHA3_256_Final(d, &sha3256); + SHA3_512_Update(&ctx, d, 256/8); + } + for (mi = 0; mi < 6; mi++) { + sha3_selftest_prng(m, mlen[mi], (384/8)*mlen[mi]); + SHA3_384_Init(&sha3384); + SHA3_384_Update(&sha3384, m, mlen[mi]); + SHA3_384_Final(d, &sha3384); + SHA3_512_Update(&ctx, d, 384/8); + } + for (mi = 0; mi < 6; mi++) { + sha3_selftest_prng(m, mlen[mi], (512/8)*mlen[mi]); + SHA3_512_Init(&sha3512); + SHA3_512_Update(&sha3512, m, mlen[mi]); + SHA3_512_Final(d, &sha3512); + SHA3_512_Update(&ctx, d, 512/8); + } + SHA3_512_Final(d, &ctx); + if (memcmp(d, d0, 64) != 0) + return -1; + + return 0; +} diff --git a/bf/algo/sha3.h b/bf/algo/sha3.h new file mode 100644 index 0000000..8118d27 --- /dev/null +++ b/bf/algo/sha3.h @@ -0,0 +1,88 @@ +/*- + * Copyright (c) 2015 Taylor R. Campbell + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef SHA3_H +#define SHA3_H + +#include +#include + +struct sha3 { + uint64_t A[25]; + unsigned nb; /* number of bytes remaining to fill buffer */ +}; + +typedef struct { struct sha3 C224; } SHA3_224_CTX; +typedef struct { struct sha3 C256; } SHA3_256_CTX; +typedef struct { struct sha3 C384; } SHA3_384_CTX; +typedef struct { struct sha3 C512; } SHA3_512_CTX; +typedef struct { struct sha3 C128; } SHAKE128_CTX; +typedef struct { struct sha3 C256; } SHAKE256_CTX; + +#define SHA3_224_DIGEST_LENGTH 28 +#define SHA3_256_DIGEST_LENGTH 32 +#define SHA3_384_DIGEST_LENGTH 48 +#define SHA3_512_DIGEST_LENGTH 64 + +void SHA3_224_Init(SHA3_224_CTX *); +void SHA3_224_Update(SHA3_224_CTX *, const uint8_t *, size_t); +void SHA3_224_Final(uint8_t[SHA3_224_DIGEST_LENGTH], SHA3_224_CTX *); + +void SHA3_256_Init(SHA3_256_CTX *); +void SHA3_256_Update(SHA3_256_CTX *, const uint8_t *, size_t); +void SHA3_256_Final(uint8_t[SHA3_256_DIGEST_LENGTH], SHA3_256_CTX *); + +void SHA3_384_Init(SHA3_384_CTX *); +void SHA3_384_Update(SHA3_384_CTX *, const uint8_t *, size_t); +void SHA3_384_Final(uint8_t[SHA3_384_DIGEST_LENGTH], SHA3_384_CTX *); + +void SHA3_512_Init(SHA3_512_CTX *); +void SHA3_512_Update(SHA3_512_CTX *, const uint8_t *, size_t); +void SHA3_512_Final(uint8_t[SHA3_512_DIGEST_LENGTH], SHA3_512_CTX *); + +void SHAKE128_Init(SHAKE128_CTX *); +void SHAKE128_Update(SHAKE128_CTX *, const uint8_t *, size_t); +void SHAKE128_Final(uint8_t *, size_t, SHAKE128_CTX *); + +void SHAKE256_Init(SHAKE256_CTX *); +void SHAKE256_Update(SHAKE256_CTX *, const uint8_t *, size_t); +void SHAKE256_Final(uint8_t *, size_t, SHAKE256_CTX *); + +#define KECCAK_256_Init SHA3_256_Init +#define KECCAK_256_Update SHA3_256_Update +void KECCAK_256_Final(uint8_t[SHA3_256_DIGEST_LENGTH], SHA3_256_CTX *); + +#define KECCAK_384_Init SHA3_384_Init +#define KECCAK_384_Update SHA3_384_Update +void KECCAK_384_Final(uint8_t[SHA3_384_DIGEST_LENGTH], SHA3_384_CTX *); + +#define KECCAK_512_Init SHA3_512_Init +#define KECCAK_512_Update SHA3_512_Update +void KECCAK_512_Final(uint8_t[SHA3_512_DIGEST_LENGTH], SHA3_512_CTX *); + +int SHA3_Selftest(void); + +#endif /* SHA3_H */ diff --git a/bf/algo/warpwallet.c b/bf/algo/warpwallet.c new file mode 100644 index 0000000..37caac2 --- /dev/null +++ b/bf/algo/warpwallet.c @@ -0,0 +1,54 @@ +/* Copyright (c) 2015 Ryan Castellucci, All Rights Reserved */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +// crypto.h used for the version +#include + +#include "../scrypt-jane/scrypt-jane.h" + +#define _PBKDF2_i (1<<16) +#define _SCRYPT_N (1<<18) +#define _SCRYPT_r 8 +#define _SCRYPT_p 1 + +#define openssl_pbkdf2(p, pl, s, ss, k, ks) \ + PKCS5_PBKDF2_HMAC(p, pl, s, ss, _PBKDF2_i, EVP_sha256(), ks, k) + +/* +#define libscrypt(p, pl, s, ss, k, ks) \ + libscrypt_scrypt(p, pl, s, ss, _SCRYPT_N, _SCRYPT_r, _SCRYPT_p, k, ks) +*/ + +#define jane_scrypt(p, pl, s, ss, k, ks) \ + scrypt(p, pl, s, ss, 17, 3, 0, k, ks) + +int warpwallet(unsigned char *pass, size_t pass_sz, + unsigned char *salt, size_t salt_sz, + unsigned char *out) { + unsigned char seed1[32], seed2[32]; + + int i, seed_sz = 32; + + pass[pass_sz] = salt[salt_sz] = 1; + //if ((ret = libscrypt(pass, pass_sz+1, salt, salt_sz+1, seed1, seed_sz)) != 0) return ret; + jane_scrypt(pass, pass_sz+1, salt, salt_sz+1, seed1, seed_sz); + + pass[pass_sz] = salt[salt_sz] = 2; + openssl_pbkdf2(pass, pass_sz+1, salt, salt_sz+1, seed2, seed_sz); + + // xor the scrypt and pbkdf2 output together + for (i = 0; i < 32; ++i) { out[i] = seed1[i] ^ seed2[i]; } + + return 0; +} diff --git a/bf/algo/warpwallet.h b/bf/algo/warpwallet.h new file mode 100644 index 0000000..bba78a3 --- /dev/null +++ b/bf/algo/warpwallet.h @@ -0,0 +1,8 @@ +/* Copyright (c) 2015 Ryan Castellucci, All Rights Reserved */ +#ifndef __BRAINFLAYER_WARPWALLET_H_ +#define __BRAINFLAYER_WARPWALLET_H_ + +int warpwallet(unsigned char *, size_t, unsigned char *, size_t, unsigned char *); + +/* vim: set ts=2 sw=2 et ai si: */ +#endif /* __BRAINFLAYER_WARPWALLET_H_ */ diff --git a/bf/blfchk.c b/bf/blfchk.c new file mode 100644 index 0000000..c9ff686 --- /dev/null +++ b/bf/blfchk.c @@ -0,0 +1,72 @@ +/* Copyright (c) 2015 Ryan Castellucci, All Rights Reserved */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include /* for ntohl/htonl */ + +#include +#include +#include + +#include "hex.h" +#include "bloom.h" +#include "mmapf.h" +#include "hash160.h" +#include "hsearchf.h" + +int main(int argc, char **argv) { + int ret; + hash160_t hash; + char *line = NULL; + size_t line_sz = 0; + unsigned char buf[128]; + unsigned char *bloom, *bloomfile, *hashfile; + FILE *ifile = stdin, *ofile = stdout, *hfile = NULL; + mmapf_ctx bloom_mmapf; + + if (argc < 2 || argc > 3) { + fprintf(stderr, "Usage: %s BLOOM_FILTER_FILE HASH_FILE\n", argv[0]); + return 1; + } + + bloomfile = argv[1]; + + if ((ret = mmapf(&bloom_mmapf, bloomfile, BLOOM_SIZE, MMAPF_RNDRD)) != MMAPF_OKAY) { + fprintf(stderr, "failed to open bloom filter '%s': %s\n", bloomfile, mmapf_strerror(ret)); + return 1; + } else if (bloom_mmapf.mem == NULL) { + fprintf(stderr, "got NULL pointer trying to set up bloom filter\n"); + return 1; + } + + bloom = bloom_mmapf.mem; + + if (argc == 3) { + hashfile = argv[2]; + hfile = fopen(hashfile, "r"); + } + + while (getline(&line, &line_sz, ifile) > 0) { + unhex(line, strlen(line), hash.uc, sizeof(hash.uc)); + if (bloom_chk_hash160(bloom, hash.ul)) { + if (hfile && !hsearchf(hfile, &hash)) { + //fprintf(ofile, "%s (false positive)\n", hex(hash.uc, sizeof(hash.uc), buf, sizeof(buf))); + continue; + } + //fprintf(ofile, "%s\n", hex(hash.uc, sizeof(hash.uc), buf, sizeof(buf))); + fprintf(ofile, "%s", line); + } + } + + return 0; +} + +/* vim: set ts=2 sw=2 et ai si: */ diff --git a/bf/bloom.c b/bf/bloom.c new file mode 100644 index 0000000..7d863a9 --- /dev/null +++ b/bf/bloom.c @@ -0,0 +1,43 @@ +/* Copyright (c) 2015 Ryan Castellucci, All Rights Reserved */ +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "bloom.h" +#include "mmapf.h" + +void bloom_set_hash160(unsigned char *bloom, uint32_t *h) { + unsigned int t; + t = BH00(h); BLOOM_SET_BIT(t); + t = BH01(h); BLOOM_SET_BIT(t); + t = BH02(h); BLOOM_SET_BIT(t); + t = BH03(h); BLOOM_SET_BIT(t); + t = BH04(h); BLOOM_SET_BIT(t); + t = BH05(h); BLOOM_SET_BIT(t); + t = BH06(h); BLOOM_SET_BIT(t); + t = BH07(h); BLOOM_SET_BIT(t); + t = BH08(h); BLOOM_SET_BIT(t); + t = BH09(h); BLOOM_SET_BIT(t); + t = BH10(h); BLOOM_SET_BIT(t); + t = BH11(h); BLOOM_SET_BIT(t); + t = BH12(h); BLOOM_SET_BIT(t); + t = BH13(h); BLOOM_SET_BIT(t); + t = BH14(h); BLOOM_SET_BIT(t); + t = BH15(h); BLOOM_SET_BIT(t); + t = BH16(h); BLOOM_SET_BIT(t); + t = BH17(h); BLOOM_SET_BIT(t); + t = BH18(h); BLOOM_SET_BIT(t); + t = BH19(h); BLOOM_SET_BIT(t); +} + +/* +int bloom_save(unsigned char *filename, unsigned char *bloom); +*/ + +/* vim: set ts=2 sw=2 et ai si: */ diff --git a/bf/bloom.h b/bf/bloom.h new file mode 100644 index 0000000..70eefa4 --- /dev/null +++ b/bf/bloom.h @@ -0,0 +1,68 @@ +/* Copyright (c) 2015 Ryan Castellucci, All Rights Reserved */ +#ifndef __BRAINFLAYER_BLOOM_H_ +#define __BRAINFLAYER_BLOOM_H_ + +#include + +/* 2^32 bits */ +#define BLOOM_SIZE (512*1024*1024) + +#define BLOOM_SET_BIT(N) (bloom[(N)>>3] = bloom[(N)>>3] | (1<<((N)&7))) +#define BLOOM_GET_BIT(N) ( ( bloom[(N)>>3]>>((N)&7) )&1) + +#define BH00(N) (N[0]) +#define BH01(N) (N[1]) +#define BH02(N) (N[2]) +#define BH03(N) (N[3]) +#define BH04(N) (N[4]) + +#define BH05(N) (N[0]<<16|N[1]>>16) +#define BH06(N) (N[1]<<16|N[2]>>16) +#define BH07(N) (N[2]<<16|N[3]>>16) +#define BH08(N) (N[3]<<16|N[4]>>16) +#define BH09(N) (N[4]<<16|N[0]>>16) + +#define BH10(N) (N[0]<< 8|N[1]>>24) +#define BH11(N) (N[1]<< 8|N[2]>>24) +#define BH12(N) (N[2]<< 8|N[3]>>24) +#define BH13(N) (N[3]<< 8|N[4]>>24) +#define BH14(N) (N[4]<< 8|N[0]>>24) + +#define BH15(N) (N[0]<<24|N[1]>> 8) +#define BH16(N) (N[1]<<24|N[2]>> 8) +#define BH17(N) (N[2]<<24|N[3]>> 8) +#define BH18(N) (N[3]<<24|N[4]>> 8) +#define BH19(N) (N[4]<<24|N[0]>> 8) + +void bloom_set_hash160(unsigned char *, uint32_t *); +inline unsigned int bloom_chk_hash160(unsigned char *bloom, uint32_t *h) { + unsigned int t; + t = BH00(h); if (BLOOM_GET_BIT(t) == 0) { return 0; } + t = BH01(h); if (BLOOM_GET_BIT(t) == 0) { return 0; } + t = BH02(h); if (BLOOM_GET_BIT(t) == 0) { return 0; } + t = BH03(h); if (BLOOM_GET_BIT(t) == 0) { return 0; } + t = BH04(h); if (BLOOM_GET_BIT(t) == 0) { return 0; } + t = BH05(h); if (BLOOM_GET_BIT(t) == 0) { return 0; } + t = BH06(h); if (BLOOM_GET_BIT(t) == 0) { return 0; } + t = BH07(h); if (BLOOM_GET_BIT(t) == 0) { return 0; } + t = BH08(h); if (BLOOM_GET_BIT(t) == 0) { return 0; } + t = BH09(h); if (BLOOM_GET_BIT(t) == 0) { return 0; } + t = BH10(h); if (BLOOM_GET_BIT(t) == 0) { return 0; } + t = BH11(h); if (BLOOM_GET_BIT(t) == 0) { return 0; } + t = BH12(h); if (BLOOM_GET_BIT(t) == 0) { return 0; } + t = BH13(h); if (BLOOM_GET_BIT(t) == 0) { return 0; } + t = BH14(h); if (BLOOM_GET_BIT(t) == 0) { return 0; } + t = BH15(h); if (BLOOM_GET_BIT(t) == 0) { return 0; } + t = BH16(h); if (BLOOM_GET_BIT(t) == 0) { return 0; } + t = BH17(h); if (BLOOM_GET_BIT(t) == 0) { return 0; } + t = BH18(h); if (BLOOM_GET_BIT(t) == 0) { return 0; } + t = BH19(h); if (BLOOM_GET_BIT(t) == 0) { return 0; } + return 1; +} + +//#define bloom_new() malloc(536870912) +//unsigned char * bloom_load(unsigned char *); +//int bloom_save(unsigned char *, unsigned char *); + +/* vim: set ts=2 sw=2 et ai si: */ +#endif /* __BRAINFLAYER_BLOOM_H_ */ diff --git a/bf/brainflayer.c b/bf/brainflayer.c new file mode 100644 index 0000000..ab1449f --- /dev/null +++ b/bf/brainflayer.c @@ -0,0 +1,880 @@ +/* Copyright (c) 2015 Ryan Castellucci, All Rights Reserved */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +#include "ripemd160_256.h" + +#include "ec_pubkey_fast.h" + +#include "hex.h" +#include "bloom.h" +#include "mmapf.h" +#include "hash160.h" +#include "hsearchf.h" + +#include "algo/brainv2.h" +#include "algo/warpwallet.h" +#include "algo/brainwalletio.h" +#include "algo/sha3.h" + +// raise this if you really want, but quickly diminishing returns +#define BATCH_MAX 4096 + +static int brainflayer_is_init = 0; + +typedef struct pubhashfn_s { + void (*fn)(hash160_t *, const unsigned char *); + char id; +} pubhashfn_t; + +static unsigned char *mem; + +static mmapf_ctx bloom_mmapf; +static unsigned char *bloom = NULL; + +static unsigned char *unhexed = NULL; +static size_t unhexed_sz = 4096; + +#define bail(code, ...) \ + do { \ + fprintf(stderr, __VA_ARGS__); \ + exit(code); \ + } while (0) + +#define chkmalloc(S) _chkmalloc(S, __FILE__, __LINE__) +static void * _chkmalloc(size_t size, unsigned char *file, unsigned int line) { + void *ptr = malloc(size); + if (ptr == NULL) { + bail(1, "malloc(%zu) failed at %s:%u: %s\n", size, file, line, strerror(errno)); + } + return ptr; +} + +#define chkrealloc(P, S) _chkrealloc(P, S, __FILE__, __LINE__); +static void * _chkrealloc(void *ptr, size_t size, unsigned char *file, unsigned int line) { + void *ptr2 = realloc(ptr, size); + if (ptr2 == NULL) { + bail(1, "realloc(%p, %zu) failed at %s:%u: %s\n", ptr, size, file, line, strerror(errno)); + } + return ptr2; +} + +uint64_t getns() { + uint64_t ns; + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + ns = ts.tv_nsec; + ns += ts.tv_sec * 1000000000ULL; + return ns; +} + +static inline void brainflayer_init_globals() { + /* only initialize stuff once */ + if (!brainflayer_is_init) { + /* initialize buffers */ + mem = chkmalloc(4096); + unhexed = chkmalloc(unhexed_sz); + + /* set the flag */ + brainflayer_is_init = 1; + } +} + +// function pointers +static int (*input2priv)(unsigned char *, unsigned char *, size_t); + +/* bitcoin uncompressed address */ +static void uhash160(hash160_t *h, const unsigned char *upub) { + SHA256_CTX ctx; + unsigned char hash[SHA256_DIGEST_LENGTH]; + + SHA256_Init(&ctx); + SHA256_Update(&ctx, upub, 65); + SHA256_Final(hash, &ctx); + ripemd160_256(hash, h->uc); +} + +/* bitcoin compressed address */ +static void chash160(hash160_t *h, const unsigned char *upub) { + SHA256_CTX ctx; + unsigned char cpub[33]; + unsigned char hash[SHA256_DIGEST_LENGTH]; + + /* quick and dirty public key compression */ + cpub[0] = 0x02 | (upub[64] & 0x01); + memcpy(cpub + 1, upub + 1, 32); + SHA256_Init(&ctx); + SHA256_Update(&ctx, cpub, 33); + SHA256_Final(hash, &ctx); + ripemd160_256(hash, h->uc); +} + +/* ethereum address */ +static void ehash160(hash160_t *h, const unsigned char *upub) { + SHA3_256_CTX ctx; + unsigned char hash[SHA256_DIGEST_LENGTH]; + + /* compute hash160 for uncompressed public key */ + /* keccak_256_last160(pub) */ + KECCAK_256_Init(&ctx); + KECCAK_256_Update(&ctx, upub+1, 64); + KECCAK_256_Final(hash, &ctx); + memcpy(h->uc, hash+12, 20); +} + +/* msb of x coordinate of public key */ +static void xhash160(hash160_t *h, const unsigned char *upub) { + memcpy(h->uc, upub+1, 20); +} + + + +static int pass2priv(unsigned char *priv, unsigned char *pass, size_t pass_sz) { + SHA256_CTX ctx; + + SHA256_Init(&ctx); + SHA256_Update(&ctx, pass, pass_sz); + SHA256_Final(priv, &ctx); + + return 0; +} + +static int keccak2priv(unsigned char *priv, unsigned char *pass, size_t pass_sz) { + SHA3_256_CTX ctx; + + KECCAK_256_Init(&ctx); + KECCAK_256_Update(&ctx, pass, pass_sz); + KECCAK_256_Final(priv, &ctx); + + return 0; +} + +/* ether.camp "2031 passes of SHA-3 (Keccak)" */ +static int camp2priv(unsigned char *priv, unsigned char *pass, size_t pass_sz) { + SHA3_256_CTX ctx; + int i; + + KECCAK_256_Init(&ctx); + KECCAK_256_Update(&ctx, pass, pass_sz); + KECCAK_256_Final(priv, &ctx); + + for (i = 1; i < 2031; ++i) { + KECCAK_256_Init(&ctx); + KECCAK_256_Update(&ctx, priv, 32); + KECCAK_256_Final(priv, &ctx); + } + + return 0; +} + +static int sha32priv(unsigned char *priv, unsigned char *pass, size_t pass_sz) { + SHA3_256_CTX ctx; + + SHA3_256_Init(&ctx); + SHA3_256_Update(&ctx, pass, pass_sz); + SHA3_256_Final(priv, &ctx); + + return 0; +} + +/* + static int dicap2hash160(unsigned char *pass, size_t pass_sz) { + SHA3_256_CTX ctx; + + int i, ret; + + KECCAK_256_Init(&ctx); + KECCAK_256_Update(&ctx, pass, pass_sz); + KECCAK_256_Final(priv256, &ctx); + for (i = 0; i < 16384; ++i) { + KECCAK_256_Init(&ctx); + KECCAK_256_Update(&ctx, priv256, 32); + KECCAK_256_Final(priv256, &ctx); + } + + for (;;) { + ret = priv2hash160(priv256); + if (hash160_uncmp.uc[0] == 0) { break; } + KECCAK_256_Init(&ctx); + KECCAK_256_Update(&ctx, priv256, 32); + KECCAK_256_Final(priv256, &ctx); + } + return ret; + } +*/ + +static int rawpriv2priv(unsigned char *priv, unsigned char *rawpriv, size_t rawpriv_sz) { + memcpy(priv, rawpriv, rawpriv_sz); + return 0; +} + +static unsigned char *kdfsalt; +static size_t kdfsalt_sz; + +static int warppass2priv(unsigned char *priv, unsigned char *pass, size_t pass_sz) { + int ret; + if ((ret = warpwallet(pass, pass_sz, kdfsalt, kdfsalt_sz, priv)) != 0) return ret; + pass[pass_sz] = 0; + return 0; +} + +static int bwiopass2priv(unsigned char *priv, unsigned char *pass, size_t pass_sz) { + int ret; + if ((ret = brainwalletio(pass, pass_sz, kdfsalt, kdfsalt_sz, priv)) != 0) return ret; + pass[pass_sz] = 0; + return 0; +} + +static int brainv2pass2priv(unsigned char *priv, unsigned char *pass, size_t pass_sz) { + unsigned char hexout[33]; + int ret; + if ((ret = brainv2(pass, pass_sz, kdfsalt, kdfsalt_sz, hexout)) != 0) return ret; + pass[pass_sz] = 0; + return pass2priv(priv, hexout, sizeof(hexout)-1); +} + +static unsigned char *kdfpass; +static size_t kdfpass_sz; + +static int warpsalt2priv(unsigned char *priv, unsigned char *salt, size_t salt_sz) { + int ret; + if ((ret = warpwallet(kdfpass, kdfpass_sz, salt, salt_sz, priv)) != 0) return ret; + salt[salt_sz] = 0; + return 0; +} + +static int bwiosalt2priv(unsigned char *priv, unsigned char *salt, size_t salt_sz) { + int ret; + if ((ret = brainwalletio(kdfpass, kdfpass_sz, salt, salt_sz, priv)) != 0) return ret; + salt[salt_sz] = 0; + return 0; +} + +static int brainv2salt2priv(unsigned char *priv, unsigned char *salt, size_t salt_sz) { + unsigned char hexout[33]; + int ret; + if ((ret = brainv2(kdfpass, kdfpass_sz, salt, salt_sz, hexout)) != 0) return ret; + salt[salt_sz] = 0; + return pass2priv(priv, hexout, sizeof(hexout)-1); +} + +static unsigned char rushchk[5]; +static int rush2priv(unsigned char *priv, unsigned char *pass, size_t pass_sz) { + SHA256_CTX ctx; + unsigned char hash[SHA256_DIGEST_LENGTH]; + unsigned char userpasshash[SHA256_DIGEST_LENGTH*2+1]; + + SHA256_Init(&ctx); + SHA256_Update(&ctx, pass, pass_sz); + SHA256_Final(hash, &ctx); + + hex(hash, sizeof(hash), userpasshash, sizeof(userpasshash)); + + SHA256_Init(&ctx); + // kdfsalt should be the fragment up to the ! + SHA256_Update(&ctx, kdfsalt, kdfsalt_sz); + SHA256_Update(&ctx, userpasshash, 64); + SHA256_Final(priv, &ctx); + + // early exit if the checksum doesn't match + if (memcmp(priv, rushchk, sizeof(rushchk)) != 0) { return -1; } + + return 0; +} + +inline static int priv_incr(unsigned char *upub, unsigned char *priv) { + int sz; + + secp256k1_ec_pubkey_incr(upub, &sz, priv); + + return 0; +} + +inline static void priv2pub(unsigned char *upub, const unsigned char *priv) { + int sz; + + secp256k1_ec_pubkey_create_precomp(upub, &sz, priv); +} + + +inline static void fprintresult(FILE *f, hash160_t *hash, + unsigned char compressed, + unsigned char *type, + unsigned char *input) { + unsigned char hexed0[41]; + + fprintf(f, "%s:%c:%s:%s\n", + hex(hash->uc, 20, hexed0, sizeof(hexed0)), + compressed, + type, + input); +} + +struct { + float alpha, ilines_rate, ilines_rate_avg; + uint64_t report_mask; + uint64_t time_last, time_curr, time_delta; + uint64_t time_start, time_elapsed; + uint64_t ilines_last, ilines_curr, ilines_delta; + uint64_t olines; + int batch_stopped; +} stats; + +void gen_stats(void) { + stats.time_curr = getns(); + stats.time_delta = stats.time_curr - stats.time_last; + stats.time_elapsed = stats.time_curr - stats.time_start; + stats.time_last = stats.time_curr; + stats.ilines_delta = stats.ilines_curr - stats.ilines_last; + stats.ilines_last = stats.ilines_curr; + stats.ilines_rate = (stats.ilines_delta * 1.0e9) / (stats.time_delta * 1.0); + + if (stats.batch_stopped < BATCH_MAX) { + /* report overall average on last status update */ + stats.ilines_rate_avg = (stats.ilines_curr * 1.0e9) / (stats.time_elapsed * 1.0); + } else if (stats.ilines_rate_avg < 0) { + stats.ilines_rate_avg = stats.ilines_rate; + /* target reporting frequency to about once every five seconds */ + } else if (stats.time_delta < 2500000000) { + stats.report_mask = (stats.report_mask << 1) | 1; + stats.ilines_rate_avg = stats.ilines_rate; /* reset EMA */ + } else if (stats.time_delta > 10000000000) { + stats.report_mask >>= 1; + stats.ilines_rate_avg = stats.ilines_rate; /* reset EMA */ + } else { + /* exponetial moving average */ + stats.ilines_rate_avg = stats.alpha + * stats.ilines_rate + + (1 - stats.alpha) + * stats.ilines_rate_avg; + } +} + +void print_stats(void) { + fprintf(stderr, + "rate: %9.2f p/s" + " found: %5zu/%-10zu" + " elapsed: %8.3f s", + stats.ilines_rate_avg, + stats.olines, + stats.ilines_curr, + stats.time_elapsed / 1.0e9 + ); + + fflush(stderr); +} + +void init_stats(void) { + stats.time_start = stats.time_last = getns(); + stats.olines = stats.ilines_last = stats.ilines_curr = 0; + stats.ilines_rate_avg = -1; + stats.alpha = 0.500; + stats.report_mask = 0; + stats.batch_stopped = -1; +} + +void sig_stats(int signo){ + if(signo == SIGUSR1) print_stats(); +} + +void usage(unsigned char *name) { + printf("Usage: %s [OPTION]...\n\n\ + -a open output file in append mode\n\ + -b FILE check for matches against bloom filter FILE\n\ + -f FILE verify matches against sorted hash160s in FILE\n\ + -i FILE read from FILE instead of stdin\n\ + -o FILE write to FILE instead of stdout\n\ + -c TYPES use TYPES for public key to hash160 computation\n\ + multiple can be specified, for example the default\n\ + is 'uc', which will check for both uncompressed\n\ + and compressed addresses using Bitcoin's algorithm\n\ + u - uncompressed address\n\ + c - compressed address\n\ + e - ethereum address\n\ + x - most signifigant bits of x coordinate\n\ + -t TYPE inputs are TYPE - supported types:\n\ + sha256 (default) - classic brainwallet\n\ + sha3 - sha3-256\n\ + priv - raw private keys (requires -x)\n\ + warp - WarpWallet (supports -s or -p)\n\ + bwio - brainwallet.io (supports -s or -p)\n\ + bv2 - brainv2 (supports -s or -p) VERY SLOW\n\ + rush - rushwallet (requires -r) FAST\n\ + keccak - keccak256 (ethercamp/old ethaddress)\n\ + camp2 - keccak256 * 2031 (new ethercamp)\n\ + -x treat input as hex encoded\n\ + -s SALT use SALT for salted input types (default: none)\n\ + -p PASSPHRASE use PASSPHRASE for salted input types, inputs\n\ + will be treated as salts\n\ + -r FRAGMENT use FRAGMENT for cracking rushwallet passphrase\n\ + -I HEXPRIVKEY incremental private key cracking mode, starting\n\ + at HEXPRIVKEY (supports -n) FAST\n\ + -k K skip the first K lines of input\n\ + -n K/N use only the Kth of every N input lines\n\ + -B batch size for affine transformations\n\ + must be a power of 2 (default/max: %d)\n\ + -w WINDOW_SIZE window size for ecmult table (default: 16)\n\ + uses about 3 * 2^w KiB memory on startup, but\n\ + only about 2^w KiB once the table is built\n\ + -m FILE load ecmult table from FILE\n\ + the ecmtabgen tool can build such a table\n\ + -v verbose - display cracking progress\n\ + -h show this help\n", name, BATCH_MAX); + //q, --quiet suppress non-error messages + exit(1); +} + +int main(int argc, char **argv) { + FILE *ifile = stdin; + FILE *ofile = stdout; + FILE *ffile = NULL; + + int ret, c, i, j; + + int64_t raw_lines = -1; + + int skipping = 0, tty = 0; + + unsigned char modestr[64]; + + int spok = 0, aopt = 0, vopt = 0, wopt = 16, xopt = 0; + int nopt_mod = 0, nopt_rem = 0, Bopt = 0; + uint64_t kopt = 0; + unsigned char *bopt = NULL, *iopt = NULL, *oopt = NULL; + unsigned char *topt = NULL, *sopt = NULL, *popt = NULL; + unsigned char *mopt = NULL, *fopt = NULL, *ropt = NULL; + unsigned char *Iopt = NULL, *copt = NULL; + + unsigned char priv[64]; + hash160_t hash160; + pubhashfn_t pubhashfn[8]; + memset(pubhashfn, 0, sizeof(pubhashfn)); + + int batch_stopped = -1; + char *batch_line[BATCH_MAX]; + size_t batch_line_sz[BATCH_MAX]; + int batch_line_read[BATCH_MAX]; + unsigned char batch_priv[BATCH_MAX][32]; + unsigned char batch_upub[BATCH_MAX][65]; + + while ((c = getopt(argc, argv, "avxb:hi:k:f:m:n:o:p:s:r:c:t:w:I:B:")) != -1) { + switch (c) { + case 'a': + aopt = 1; // open output file in append mode + break; + case 'k': + kopt = strtoull(optarg, NULL, 10); // skip first k lines of input + skipping = 1; + break; + case 'n': + // only try the rem'th of every mod lines (one indexed) + nopt_rem = atoi(optarg) - 1; + optarg = strchr(optarg, '/'); + if (optarg != NULL) { nopt_mod = atoi(optarg+1); } + skipping = 1; + break; + case 'B': + Bopt = atoi(optarg); + break; + case 'w': + if (wopt > 1) wopt = atoi(optarg); + break; + case 'm': + mopt = optarg; // table file + wopt = 1; // auto + break; + case 'v': + vopt = 1; // verbose + break; + case 'b': + bopt = optarg; // bloom filter file + break; + case 'f': + fopt = optarg; // full filter file + break; + case 'i': + iopt = optarg; // input file + break; + case 'o': + oopt = optarg; // output file + break; + case 'x': + xopt = 1; // input is hex encoded + break; + case 's': + sopt = optarg; // salt + break; + case 'p': + popt = optarg; // passphrase + break; + case 'r': + ropt = optarg; // rushwallet + break; + case 'c': + copt = optarg; // type of hash160 + break; + case 't': + topt = optarg; // type of input + break; + case 'I': + Iopt = optarg; // start key for incremental + xopt = 1; // input is hex encoded + break; + case 'h': + // show help + usage(argv[0]); + return 0; + case '?': + // show error + return 1; + default: + // should never be reached... + printf("got option '%c' (%d)\n", c, c); + return 1; + } + } + + if (optind < argc) { + if (optind == 1 && argc == 2) { + // older versions of brainflayer had the bloom filter file as a + // single optional argument, this keeps compatibility with that + bopt = argv[1]; + } else { + fprintf(stderr, "Invalid arguments:\n"); + while (optind < argc) { + fprintf(stderr, " '%s'\n", argv[optind++]); + } + exit(1); + } + } + + if (nopt_rem != 0 || nopt_mod != 0) { + // note that nopt_rem has had one subtracted at option parsing + if (nopt_rem >= nopt_mod) { + bail(1, "Invalid '-n' argument, remainder '%d' must be <= modulus '%d'\n", nopt_rem+1, nopt_mod); + } else if (nopt_rem < 0) { + bail(1, "Invalid '-n' argument, remainder '%d' must be > 0\n", nopt_rem+1); + } else if (nopt_mod < 1) { + bail(1, "Invalid '-n' argument, modulus '%d' must be > 0\n", nopt_mod); + } + } + + if (wopt < 1 || wopt > 28) { + bail(1, "Invalid window size '%d' - must be >= 1 and <= 28\n", wopt); + } else { + // very rough sanity check of window size + struct sysinfo info; + sysinfo(&info); + uint64_t sysram = info.mem_unit * info.totalram; + if (3584LLU*(1< sysram) { + bail(1, "Not enough ram for requested window size '%d'\n", wopt); + } + } + + if (Bopt) { // if unset, will be set later + if (Bopt < 1 || Bopt > BATCH_MAX) { + bail(1, "Invalid '-B' argument, batch size '%d' - must be >= 1 and <= %d\n", Bopt, BATCH_MAX); + } else if (Bopt & (Bopt - 1)) { // https://graphics.stanford.edu/~seander/bithacks.html#DetermineIfPowerOf2 + bail(1, "Invalid '-B' argument, batch size '%d' is not a power of 2\n", Bopt); + } + } + + if (Iopt) { + if (strlen(Iopt) != 64) { + bail(1, "The starting key passed to the '-I' must be 64 hex digits exactly\n"); + } + if (topt) { + bail(1, "Cannot specify input type in incremental mode\n"); + } + topt = "priv"; + // normally, getline would allocate the batch_line entries, but we need to + // do this to give the processing loop somewhere to write to in incr mode + for (i = 0; i < BATCH_MAX; ++i) { + batch_line[i] = Iopt; + } + unhex(Iopt, sizeof(priv)*2, priv, sizeof(priv)); + skipping = 1; + if (!nopt_mod) { nopt_mod = 1; }; + } + + + /* handle copt */ + if (copt == NULL) { copt = "uc"; } + i = 0; + while (copt[i]) { + switch (copt[i]) { + case 'u': + pubhashfn[i].fn = &uhash160; + break; + case 'c': + pubhashfn[i].fn = &chash160; + break; + case 'e': + pubhashfn[i].fn = &ehash160; + break; + case 'x': + pubhashfn[i].fn = &xhash160; + break; + default: + bail(1, "Unknown hash160 type '%c'.\n", copt[i]); + } + if (strchr(copt + i + 1, copt[i])) { + bail(1, "Duplicate hash160 type '%c'.\n", copt[i]); + } + pubhashfn[i].id = copt[i]; + ++i; + } + + /* handle topt */ + if (topt == NULL) { topt = "sha256"; } + + if (strcmp(topt, "sha256") == 0) { + input2priv = &pass2priv; + } else if (strcmp(topt, "priv") == 0) { + if (!xopt) { + bail(1, "raw private key input requires -x"); + } + input2priv = &rawpriv2priv; + } else if (strcmp(topt, "warp") == 0) { + if (!Bopt) { Bopt = 1; } // don't batch transform for slow input hashes by default + spok = 1; + input2priv = popt ? &warpsalt2priv : &warppass2priv; + } else if (strcmp(topt, "bwio") == 0) { + if (!Bopt) { Bopt = 1; } // don't batch transform for slow input hashes by default + spok = 1; + input2priv = popt ? &bwiosalt2priv : &bwiopass2priv; + } else if (strcmp(topt, "bv2") == 0) { + if (!Bopt) { Bopt = 1; } // don't batch transform for slow input hashes by default + spok = 1; + input2priv = popt ? &brainv2salt2priv : &brainv2pass2priv; + } else if (strcmp(topt, "rush") == 0) { + input2priv = &rush2priv; + } else if (strcmp(topt, "camp2") == 0) { + input2priv = &camp2priv; + } else if (strcmp(topt, "keccak") == 0) { + input2priv = &keccak2priv; + } else if (strcmp(topt, "sha3") == 0) { + input2priv = &sha32priv; + // } else if (strcmp(topt, "dicap") == 0) { + // input2priv = &dicap2priv; + } else { + bail(1, "Unknown input type '%s'.\n", topt); + } + + if (spok) { + if (sopt && popt) { + bail(1, "Cannot specify both a salt and a passphrase\n"); + } + if (popt) { + kdfpass = popt; + kdfpass_sz = strlen(popt); + } else { + if (sopt) { + kdfsalt = sopt; + kdfsalt_sz = strlen(kdfsalt); + } else { + kdfsalt = chkmalloc(0); + kdfsalt_sz = 0; + } + } + } else { + if (popt) { + bail(1, "Specifying a passphrase not supported with input type '%s'\n", topt); + } else if (sopt) { + bail(1, "Specifying a salt not supported with this input type '%s'\n", topt); + } + } + + if (ropt) { + if (input2priv != &rush2priv) { + bail(1, "Specifying a url fragment only supported with input type 'rush'\n"); + } + kdfsalt = ropt; + kdfsalt_sz = strlen(kdfsalt) - sizeof(rushchk)*2; + if (kdfsalt[kdfsalt_sz-1] != '!') { + bail(1, "Invalid rushwallet url fragment '%s'\n", kdfsalt); + } + unhex(kdfsalt+kdfsalt_sz, sizeof(rushchk)*2, rushchk, sizeof(rushchk)); + kdfsalt[kdfsalt_sz] = '\0'; + } else if (input2priv == &rush2priv) { + bail(1, "The '-r' option is required for rushwallet.\n"); + } + + snprintf(modestr, sizeof(modestr), xopt ? "(hex)%s" : "%s", topt); + + if (bopt) { + if ((ret = mmapf(&bloom_mmapf, bopt, BLOOM_SIZE, MMAPF_RNDRD)) != MMAPF_OKAY) { + bail(1, "failed to open bloom filter '%s': %s\n", bopt, mmapf_strerror(ret)); + } else if (bloom_mmapf.mem == NULL) { + bail(1, "got NULL pointer trying to set up bloom filter\n"); + } + bloom = bloom_mmapf.mem; + } + + if (fopt) { + if (!bopt) { + bail(1, "The '-f' option must be used with a bloom filter\n"); + } + if ((ffile = fopen(fopt, "r")) == NULL) { + bail(1, "failed to open '%s' for reading: %s\n", fopt, strerror(errno)); + } + } + + if (iopt) { + if ((ifile = fopen(iopt, "r")) == NULL) { + bail(1, "failed to open '%s' for reading: %s\n", iopt, strerror(errno)); + } + // increases readahead window, don't really care if it fails + posix_fadvise(fileno(ifile), 0, 0, POSIX_FADV_SEQUENTIAL); + } + + if (oopt && (ofile = fopen(oopt, (aopt ? "a" : "w"))) == NULL) { + bail(1, "failed to open '%s' for writing: %s\n", oopt, strerror(errno)); + } + + /* line buffer output */ + setvbuf(ofile, NULL, _IOLBF, 0); + /* line buffer stderr */ + setvbuf(stderr, NULL, _IOLBF, 0); + + if (vopt && ofile == stdout && isatty(fileno(stdout))) { tty = 1; } + + brainflayer_init_globals(); + + if (secp256k1_ec_pubkey_precomp_table(wopt, mopt) != 0) { + bail(1, "failed to initialize precomputed table\n"); + } + + if (secp256k1_ec_pubkey_batch_init(BATCH_MAX) != 0) { + bail(1, "failed to initialize batch point conversion structures\n"); + } + + if (vopt) { + init_stats(); + if(signal(SIGUSR1, sig_stats) == SIG_ERR){ + fprintf(stderr, "Can't set sig_stat"); + fflush(stderr); + } + } + + // set default batch size + if (!Bopt) { Bopt = BATCH_MAX; } + + for (;;) { + if (Iopt) { + if (skipping) { + priv_add_uint32(priv, nopt_rem + kopt); + skipping = 0; + } + secp256k1_ec_pubkey_batch_incr(Bopt, nopt_mod, batch_upub, batch_priv, priv); + memcpy(priv, batch_priv[Bopt-1], 32); + priv_add_uint32(priv, nopt_mod); + + batch_stopped = Bopt; + stats.batch_stopped = batch_stopped; + } else { + for (i = 0; i < Bopt; ++i) { + if ((batch_line_read[i] = getline(&batch_line[i], &batch_line_sz[i], ifile)-1) > -1) { + if (skipping) { + ++raw_lines; + if (kopt && raw_lines < kopt) { --i; continue; } + if (nopt_mod && raw_lines % nopt_mod != nopt_rem) { --i; continue; } + } + } else { + break; + } + batch_line[i][batch_line_read[i]] = 0; + if (xopt) { + if (batch_line_read[i] / 2 > unhexed_sz) { + unhexed_sz = batch_line_read[i]; + unhexed = chkrealloc(unhexed, unhexed_sz); + } + // rewrite the input line from hex + unhex(batch_line[i], batch_line_read[i], unhexed, unhexed_sz); + if (input2priv(batch_priv[i], unhexed, batch_line_read[i]/2) != 0) { + fprintf(stderr, "input2priv failed! continuing...\n"); + } + } else { + if (input2priv(batch_priv[i], batch_line[i], batch_line_read[i]) != 0) { + fprintf(stderr, "input2priv failed! continuing...\n"); + } + } + } + + // batch compute the public keys + secp256k1_ec_pubkey_batch_create(Bopt, batch_upub, batch_priv); + + // save ending value from read loop + batch_stopped = i; + stats.batch_stopped = i; + } + + // loop over the public keys + for (i = 0; i < batch_stopped; ++i) { + j = 0; + if (bloom) { /* crack mode */ + // loop over pubkey hash functions + while (pubhashfn[j].fn != NULL) { + pubhashfn[j].fn(&hash160, batch_upub[i]); + if (bloom_chk_hash160(bloom, hash160.ul)) { + if (!fopt || hsearchf(ffile, &hash160)) { + if (tty) { fprintf(ofile, "\033[0K"); } + // reformat/populate the line if required + if (Iopt) { + hex(batch_priv[i], 32, batch_line[i], 65); + } + fprintresult(ofile, &hash160, pubhashfn[j].id, modestr, batch_line[i]); + ++stats.olines; + } + } + ++j; + } + } else { /* generate mode */ + // reformat/populate the line if required + if (Iopt) { + hex(batch_priv[i], 32, batch_line[i], 65); + } + while (pubhashfn[j].fn != NULL) { + pubhashfn[j].fn(&hash160, batch_upub[i]); + fprintresult(ofile, &hash160, pubhashfn[j].id, modestr, batch_line[i]); + ++j; + } + } + } + // end public key processing loop + + // start stats + if (vopt) { + stats.ilines_curr += stats.batch_stopped; + if (stats.batch_stopped < BATCH_MAX || (stats.ilines_curr & stats.report_mask) == 0) { + gen_stats(); + } + } + // end stats + + // main loop exit condition + if (batch_stopped < Bopt) { + if (vopt) { fprintf(stderr, "\n"); } + break; + } + } + + return 0; +} + +/* vim: set ts=2 sw=2 et ai si: */ diff --git a/bf/crack.h b/bf/crack.h new file mode 100644 index 0000000..97e0a36 --- /dev/null +++ b/bf/crack.h @@ -0,0 +1,24 @@ +/* Copyright (c) 2015 Ryan Castellucci, All Rights Reserved */ +#ifndef __BRAINFLAYER_CRACK_H_ +#define __BRAINFLAYER_CRACK_H_ + +#include +#include + +#define PWF_BUF_SZ 1024 + +typedef union hash160_u { + unsigned char uc[RIPEMD160_DIGEST_LENGTH]; + uint32_t ul[RIPEMD160_DIGEST_LENGTH>>2]; +} hash160_t; + +typedef struct keydata_u { + int state; + unsigned char[PWF_BUF_SZ] password; + unsigned char[SHA256_DIGEST_LENGTH] priv; + unsigned char[RIPEMD160_DIGEST_LENGTH] uaddr; + unsigned char[RIPEMD160_DIGEST_LENGTH] caddr; +} keydata_t; + +/* vim: set ts=2 sw=2 et ai si: */ +#endif /* __BRAINFLAYER_CRACK_H_ */ diff --git a/bf/dldummy.c b/bf/dldummy.c new file mode 100644 index 0000000..071df5f --- /dev/null +++ b/bf/dldummy.c @@ -0,0 +1,13 @@ +/* gets rid of warnings about opnessl using dlopen when statically linked */ + +#include + +static char *dlerrstr = "dynamic loader not available (using dldummy)"; + +//void * dlopen(const char *filename, int flat) { return NULL; } +void * dlopen(void) { return NULL; } +char * dlerror(void) { return dlerrstr; } +void * dlsym(void) { return NULL; } +int dlclose(void) { return -1; } +/* this is supposed to set some null pointers on error, but fuck it */ +int dladdr(void) { return -1; } diff --git a/bf/ec_pubkey_fast.c b/bf/ec_pubkey_fast.c new file mode 100644 index 0000000..6936d6b --- /dev/null +++ b/bf/ec_pubkey_fast.c @@ -0,0 +1,539 @@ +/* Copyright (c) 2015 Nicolas Courtois, Guangyan Song, Ryan Castellucci, All Rights Reserved */ +#include "ec_pubkey_fast.h" + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "secp256k1/src/libsecp256k1-config.h" +#include "secp256k1/include/secp256k1.h" + +#include "secp256k1/src/util.h" +#include "secp256k1/src/num_impl.h" +#include "secp256k1/src/field_impl.h" +#include "secp256k1/src/field_10x26_impl.h" +#include "secp256k1/src/scalar_impl.h" +#include "secp256k1/src/group_impl.h" +#include "secp256k1/src/ecmult_gen_impl.h" +#include "secp256k1/src/ecmult.h" +#include "secp256k1/src/eckey_impl.h" + +static int secp256k1_eckey_pubkey_parse(secp256k1_ge_t *elem, const unsigned char *pub, int size); + +#include "mmapf.h" + +#undef ASSERT + +#define READBIT(A, B) ((A >> (B & 7)) & 1) +#define SETBIT(T, B, V) (T = V ? T | (1<infinity = 1; + int bits; + + for (int j = 0; j < n_windows; j++) { + if (j == n_windows -1 && remmining != 0) { + bits = 0; + for (int i = 0; i < remmining; i++) { + SETBIT(bits,i,a[i + j * WINDOW_SIZE]); + } + } else { + bits = 0; + for (int i = 0; i < WINDOW_SIZE; i++) { + SETBIT(bits,i,a[i + j * WINDOW_SIZE]); + } + } +#if 1 + secp256k1_gej_add_ge_var(r, r, &prec[j*n_values + bits], NULL); +#else + secp256k1_gej_add_ge(r, r, &prec[j*n_values + bits]); +#endif + } +} + +#ifdef USE_BL_ARITHMETIC +static void secp256k1_gej_add_ge_bl(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_ge_t *b, secp256k1_fe_t *rzr) { + secp256k1_fe_t z1z1, /*z1,*/ u2, x1, y1, t0, s2, h, hh, i, j, t1, rr, v, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11; + // 7M + 4S + 2 normalize + 22 mul_int/add/negate + if (a->infinity) { + VERIFY_CHECK(rzr == NULL); + secp256k1_gej_set_ge(r, b); + return; + } + if (b->infinity) { + if (rzr) { + secp256k1_fe_set_int(rzr, 1); + } + *r = *a; + return; + } + r->infinity = 0; + + x1 = a->x; secp256k1_fe_normalize_weak(&x1); + y1 = a->y; secp256k1_fe_normalize_weak(&y1); + + secp256k1_fe_sqr(&z1z1, &a->z); // z1z1 = z1^2 + secp256k1_fe_mul(&u2, &b->x, &z1z1); // u2 = x2*z1z1 + secp256k1_fe_mul(&t0, &a->z, &z1z1); // t0 = z1*z1z1 + secp256k1_fe_mul(&s2, &b->y, &t0); // s2 = y2 * t0 + secp256k1_fe_negate(&h, &x1, 1); secp256k1_fe_add(&h, &u2); // h = u2-x1 (3) + secp256k1_fe_sqr(&hh,&h); // hh = h^2 + i = hh; secp256k1_fe_mul_int(&i,4); // i = 4*hh + if (secp256k1_fe_normalizes_to_zero_var(&h)) { + if (secp256k1_fe_normalizes_to_zero_var(&i)) { + secp256k1_gej_double_var(r, a, rzr); + } else { + if (rzr) { + secp256k1_fe_set_int(rzr, 0); + } + r->infinity = 1; + } + return; + } + secp256k1_fe_mul(&j,&h,&i); // j = h*i + secp256k1_fe_negate(&t1, &y1, 1); secp256k1_fe_add(&t1, &s2); // t1 = s2-y1 + rr = t1; secp256k1_fe_mul_int(&rr, 2); // rr = 2 * t1; + secp256k1_fe_mul(&v, &x1, &i); // v = x1 * i + secp256k1_fe_sqr(&t2, &rr); // t2 = rr^2 + t3 = v; secp256k1_fe_mul_int(&t3, 2); // t3 = 2*v + secp256k1_fe_negate(&t4, &j, 1); secp256k1_fe_add(&t4, &t2); // t4 = t2 - j + secp256k1_fe_negate(&r->x, &t3, 2); secp256k1_fe_add(&r->x, &t4); // x3 = t4 - t3; + //secp256k1_fe_normalize_weak(&r->x); + secp256k1_fe_negate(&t5, &r->x, 6); secp256k1_fe_add(&t5, &v); // t5 = v - x3 + secp256k1_fe_mul(&t6,&y1,&j); // t6 = y1 * j + t7 = t6; secp256k1_fe_mul_int(&t7,2); // t7 = 2*t6; + secp256k1_fe_mul(&t8,&rr,&t5); // t8 = rr* t5; + secp256k1_fe_negate(&r->y, &t7, 2); secp256k1_fe_add(&r->y,&t8); // y3 = t8-t7 + //secp256k1_fe_normalize_weak(&r->y); + t9 = h; secp256k1_fe_add(&t9, &a->z); // t9 = z1 + h + secp256k1_fe_sqr(&t10, &t9); // t10 = t9^2 + secp256k1_fe_negate(&t11, &z1z1, 1); secp256k1_fe_add(&t11, &t10); // t11 = t10-z1z1 + secp256k1_fe_negate(&r->z, &hh, 1); secp256k1_fe_add(&r->z, &t11); // z3 = t11 - hh + +} + +static void secp256k1_ecmult_gen_bl(secp256k1_gej_t *r, const unsigned char *seckey){ + unsigned char a[256]; + for (int j = 0; j < 32; j++){ + for (int i = 0; i < 8; i++){ + a[i+j*8] = READBIT(seckey[31-j], i); + } + } + + r->infinity = 1; + int bits; + + for (int j = 0; j < n_windows; j++) { + if (j == n_windows -1 && remmining != 0) { + bits = 0; + for (int i = 0; i < remmining; i++) { + SETBIT(bits,i,a[i + j * WINDOW_SIZE]); + } + //bits = secp256k1_scalar_get_bits2(a, j * WINDOW_SIZE, remmining); + } else { + bits = 0; + for (int i = 0; i < WINDOW_SIZE; i++) { + SETBIT(bits,i,a[i + j * WINDOW_SIZE]); + } + //bits = secp256k1_scalar_get_bits2(a, j * WINDOW_SIZE, WINDOW_SIZE); + } + secp256k1_gej_add_ge_bl(r, r, &prec[j*n_values + bits], NULL); + } +} +#endif + +int secp256k1_ec_pubkey_create_precomp(unsigned char *pub_chr, int *pub_chr_sz, const unsigned char *seckey) { + secp256k1_gej_t pj; + secp256k1_ge_t p; + +#ifdef USE_BL_ARITHMETIC + secp256k1_ecmult_gen_bl(&pj, seckey); +#else + secp256k1_ecmult_gen2(&pj, seckey); +#endif + secp256k1_ge_set_gej(&p, &pj); + + *pub_chr_sz = 65; + pub_chr[0] = 4; + + secp256k1_fe_normalize_var(&p.x); + secp256k1_fe_normalize_var(&p.y); + secp256k1_fe_get_b32(pub_chr + 1, &p.x); + secp256k1_fe_get_b32(pub_chr + 33, &p.y); + + return 0; +} + +static secp256k1_gej_t *batchpj; +static secp256k1_ge_t *batchpa; +static secp256k1_fe_t *batchaz; +static secp256k1_fe_t *batchai; + +int secp256k1_ec_pubkey_batch_init(unsigned int num) { + if (!batchpj) { batchpj = malloc(sizeof(secp256k1_gej_t)*num); } + if (!batchpa) { batchpa = malloc(sizeof(secp256k1_ge_t)*num); } + if (!batchaz) { batchaz = malloc(sizeof(secp256k1_fe_t)*num); } + if (!batchai) { batchai = malloc(sizeof(secp256k1_fe_t)*num); } + if (batchpj == NULL || batchpa == NULL || batchaz == NULL || batchai == NULL) { + return 1; + } else { + return 0; + } +} + +void secp256k1_ge_set_all_gej_static(int num, secp256k1_ge_t *batchpa, secp256k1_gej_t *batchpj) { + size_t i; + for (i = 0; i < num; i++) { + batchaz[i] = batchpj[i].z; + } + + secp256k1_fe_inv_all_var(num, batchai, batchaz); + + for (i = 0; i < num; i++) { + secp256k1_ge_set_gej_zinv(&batchpa[i], &batchpj[i], &batchai[i]); + } +} + +// call secp256k1_ec_pubkey_batch_init first or you get segfaults +int secp256k1_ec_pubkey_batch_incr(unsigned int num, unsigned int skip, unsigned char (*pub)[65], unsigned char (*sec)[32], unsigned char start[32]) { + // some of the values could be reused between calls, but dealing with the data + // structures is a pain, and with a reasonable batch size, the perf difference + // is tiny + int i; + + unsigned char b32[32]; + + secp256k1_scalar_t priv, incr_s; + secp256k1_gej_t temp; + secp256k1_ge_t incr_a; + + /* load staring private key */ + secp256k1_scalar_set_b32(&priv, start, NULL); + + /* fill first private */ + secp256k1_scalar_get_b32(sec[0], &priv); + + /* set up increments */ + secp256k1_scalar_set_int(&incr_s, skip); + secp256k1_scalar_get_b32(b32, &incr_s); + +#ifdef USE_BL_ARITHMETIC + secp256k1_ecmult_gen_bl(&temp, b32); + secp256k1_ecmult_gen_bl(&batchpj[0], start); +#else + secp256k1_ecmult_gen2(&temp, b32); + secp256k1_ecmult_gen2(&batchpj[0], start); +#endif + + /* get affine public point for incrementing */ + secp256k1_ge_set_gej_var(&incr_a, &temp); + + for (i = 1; i < num; ++i) { + /* increment and write private key */ + secp256k1_scalar_add(&priv, &priv, &incr_s); + secp256k1_scalar_get_b32(sec[i], &priv); + + /* increment public key */ + secp256k1_gej_add_ge_var(&batchpj[i], &batchpj[i-1], &incr_a, NULL); + } + + /* convert all jacobian coordinates to affine */ + secp256k1_ge_set_all_gej_static(num, batchpa, batchpj); + + /* write out formatted public key */ + for (i = 0; i < num; ++i) { + secp256k1_fe_normalize_var(&batchpa[i].x); + secp256k1_fe_normalize_var(&batchpa[i].y); + + pub[i][0] = 0x04; + secp256k1_fe_get_b32(pub[i] + 1, &batchpa[i].x); + secp256k1_fe_get_b32(pub[i] + 33, &batchpa[i].y); + } + + return 0; +} + +// call secp256k1_ec_pubkey_batch_init first or you get segfaults +int secp256k1_ec_pubkey_batch_create(unsigned int num, unsigned char (*pub)[65], unsigned char (*sec)[32]) { + int i; + + /* generate jacobian coordinates */ + for (i = 0; i < num; ++i) { +#ifdef USE_BL_ARITHMETIC + secp256k1_ecmult_gen_bl(&batchpj[i], sec[i]); +#else + secp256k1_ecmult_gen2(&batchpj[i], sec[i]); +#endif + } + + /* convert all jacobian coordinates to affine */ + secp256k1_ge_set_all_gej_static(num, batchpa, batchpj); + + /* write out formatted public key */ + for (i = 0; i < num; ++i) { + secp256k1_fe_normalize_var(&batchpa[i].x); + secp256k1_fe_normalize_var(&batchpa[i].y); + + pub[i][0] = 0x04; + secp256k1_fe_get_b32(pub[i] + 1, &batchpa[i].x); + secp256k1_fe_get_b32(pub[i] + 33, &batchpa[i].y); + } + + return 0; +} + +int secp256k1_scalar_add_b32(void * out, void * a, void *b) { + secp256k1_scalar_t tmp_a, tmp_b; + + secp256k1_scalar_set_b32(&tmp_a, a, NULL); + secp256k1_scalar_set_b32(&tmp_b, b, NULL); + secp256k1_scalar_add(&tmp_a, &tmp_a, &tmp_b); + secp256k1_scalar_get_b32(out, &tmp_a); + + return 0; +} + +inline static void _priv_add(unsigned char *priv, unsigned char add, int p) { + priv[p] += add; + if (priv[p] < add) { + priv[--p] += 1; + while (p) { + if (priv[p] == 0) { + priv[--p] += 1; + } else { + break; + } + } + } +} + +void priv_add_uint8(unsigned char *priv, unsigned char add) { + _priv_add(priv, add, 31); +} + +void priv_add_uint32(unsigned char *priv, unsigned int add) { + int p = 31; + while (add) { + _priv_add(priv, add & 255, p--); + add >>= 8; + } +} + +typedef struct { + secp256k1_gej_t pubj; + secp256k1_ge_t inc; + secp256k1_gej_t incj; + unsigned int n; +} pubkey_incr_t; + +pubkey_incr_t pubkey_incr_ctx; + +int secp256k1_ec_pubkey_incr_init(unsigned char *seckey, unsigned int add) { + unsigned char incr_priv[32]; + memset(incr_priv, 0, sizeof(incr_priv)); + memset(&pubkey_incr_ctx, 0, sizeof(pubkey_incr_ctx)); + priv_add_uint32(incr_priv, add); + + pubkey_incr_ctx.n = add; + +#ifdef USE_BL_ARITHMETIC + secp256k1_ecmult_gen_bl(&pubkey_incr_ctx.pubj, seckey); + secp256k1_ecmult_gen_bl(&pubkey_incr_ctx.incj, incr_priv); +#else + secp256k1_ecmult_gen2(&pubkey_incr_ctx.pubj, seckey); + secp256k1_ecmult_gen2(&pubkey_incr_ctx.incj, incr_priv); +#endif + secp256k1_ge_set_gej(&pubkey_incr_ctx.inc, &pubkey_incr_ctx.incj); + + return 0; +} + +int secp256k1_ec_pubkey_incr(unsigned char *pub_chr, int *pub_chr_sz, unsigned char *seckey) { + secp256k1_ge_t p; + + priv_add_uint32(seckey, pubkey_incr_ctx.n); +#ifdef USE_BL_ARITHMETIC + secp256k1_gej_add_ge_bl(&pubkey_incr_ctx.pubj, &pubkey_incr_ctx.pubj, &pubkey_incr_ctx.inc, NULL); +#else + secp256k1_gej_add_ge_var(&pubkey_incr_ctx.pubj, &pubkey_incr_ctx.pubj, &pubkey_incr_ctx.inc, NULL); +#endif + + secp256k1_ge_set_gej(&p, &pubkey_incr_ctx.pubj); + + *pub_chr_sz = 65; + pub_chr[0] = 4; + + secp256k1_fe_normalize_var(&p.x); + secp256k1_fe_normalize_var(&p.y); + secp256k1_fe_get_b32(pub_chr + 1, &p.x); + secp256k1_fe_get_b32(pub_chr + 33, &p.y); + + return 0; +} + +void * secp256k1_ec_priv_to_gej(unsigned char *priv) { + secp256k1_gej_t *gej = malloc(sizeof(secp256k1_gej_t)); +#ifdef USE_BL_ARITHMETIC + secp256k1_ecmult_gen_bl(gej, priv); +#else + secp256k1_ecmult_gen2(gej, priv); +#endif + + return gej; +} + +int secp256k1_ec_pubkey_add_gej(unsigned char *pub_chr, int *pub_chr_sz, void *add) { + secp256k1_ge_t in; + secp256k1_ge_t p; + + secp256k1_gej_t out; + + secp256k1_eckey_pubkey_parse(&in, pub_chr, *pub_chr_sz); + +#ifdef USE_BL_ARITHMETIC + secp256k1_gej_add_ge_bl(&out, (secp256k1_gej_t *)add, &in, NULL); +#else + secp256k1_gej_add_ge_var(&out, (secp256k1_gej_t *)add, &in, NULL); +#endif + + secp256k1_ge_set_gej(&p, &out); + + *pub_chr_sz = 65; + pub_chr[0] = 4; + + secp256k1_fe_normalize_var(&p.x); + secp256k1_fe_normalize_var(&p.y); + secp256k1_fe_get_b32(pub_chr + 1, &p.x); + secp256k1_fe_get_b32(pub_chr + 33, &p.y); + + return 0; +} + +/* vim: set ts=2 sw=2 et ai si: */ diff --git a/bf/ec_pubkey_fast.h b/bf/ec_pubkey_fast.h new file mode 100644 index 0000000..65d3796 --- /dev/null +++ b/bf/ec_pubkey_fast.h @@ -0,0 +1,22 @@ +/* Copyright (c) 2015 Nicolas Courtois, Guangyan Song, Ryan Castellucci, All Rights Reserved */ +#ifndef __EC_PUBKEY_FAST_H_ +#define __EC_PUBKEY_FAST_H_ + +int secp256k1_ec_pubkey_precomp_table_save(int, unsigned char *); +int secp256k1_ec_pubkey_precomp_table(int, unsigned char *); +int secp256k1_ec_pubkey_create_precomp(unsigned char *, int *, const unsigned char *); +int secp256k1_ec_pubkey_incr_init(unsigned char *, unsigned int); +int secp256k1_ec_pubkey_incr(unsigned char *, int *, unsigned char *); + +int secp256k1_scalar_add_b32(void *, void *, void *); + +void priv_add_uint8(unsigned char *, unsigned char); +void priv_add_uint32(unsigned char *, unsigned int); + +void * secp256k1_ec_priv_to_gej(unsigned char *); +int secp256k1_ec_pubkey_add_gej(unsigned char *, int *, void *); + +int secp256k1_ec_pubkey_batch_init(unsigned int); +int secp256k1_ec_pubkey_batch_create(unsigned int, unsigned char (*)[65], unsigned char (*)[32]); +int secp256k1_ec_pubkey_batch_incr(unsigned int, unsigned int, unsigned char (*)[65], unsigned char (*)[32], unsigned char[32]); +#endif//__EC_PUBKEY_FAST_H_ diff --git a/bf/ecmtabgen.c b/bf/ecmtabgen.c new file mode 100644 index 0000000..d12fb8a --- /dev/null +++ b/bf/ecmtabgen.c @@ -0,0 +1,29 @@ +/* Copyright (c) 2015 Ryan Castellucci, All Rights Reserved */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "ec_pubkey_fast.h" + +int main(int argc, char **argv) { + int ret; + if (argc != 3) { + fprintf(stderr, "[!] Usage: %s window_size tablefile.tab\n", argv[0]); + exit(1); + } + + if ((ret = secp256k1_ec_pubkey_precomp_table_save(atoi(argv[1]), argv[2])) < 0) + fprintf(stderr, "[!] Failed to write tablefile '%s'\n", argv[2]); + + return ret; +} + +/* vim: set ts=2 sw=2 et ai si: */ diff --git a/bf/filehex.c b/bf/filehex.c new file mode 100644 index 0000000..7ca9f92 --- /dev/null +++ b/bf/filehex.c @@ -0,0 +1,49 @@ +/* Copyright (c) 2015 Ryan Castellucci, All Rights Reserved */ +#include +#include +#include +#include + +#include "hex.h" + +void filehex(FILE *ifile, const unsigned char *filename) { + unsigned char hexed[65]; + unsigned char buf[65536]; + size_t offset; + int r, i, buf_pos, i_max; + + offset = buf_pos = 0; + while ((r = fread(buf + buf_pos, 1, 256, ifile)) > 0) { + i_max = r + buf_pos - 31; + for (i = 0; i < i_max; ++i, ++offset) { + printf("%s:%s,%zu\n", hex(buf+i, 32, hexed, 65), filename, offset); + } + memcpy(buf, buf+i, buf_pos = 31); + } +} + +int main(int argc, char **argv) { + int i; + FILE *ifile; + + /* + if (argc > 1) { + fprintf(stderr, "Usage: %s\n", argv[0]); + return 1; + }*/ + + if (argc == 1) { + filehex(stdin, "STDIN"); + } else { + for (i = 1; i < argc; ++i) { + if ((ifile = fopen(argv[i], "r")) != NULL) { + filehex(ifile, argv[i]); + fclose(ifile); + } + } + } + + return 0; +} + +/* vim: set ts=2 sw=2 et ai si: */ diff --git a/bf/hash160.h b/bf/hash160.h new file mode 100644 index 0000000..7f2ad37 --- /dev/null +++ b/bf/hash160.h @@ -0,0 +1,14 @@ +/* Copyright (c) 2015 Ryan Castellucci, All Rights Reserved */ +#ifndef __BRAINFLAYER_HASH160_H_ +#define __BRAINFLAYER_HASH160_H_ + +#include +#include + +typedef union hash160_u { + unsigned char uc[RIPEMD160_DIGEST_LENGTH]; + uint32_t ul[RIPEMD160_DIGEST_LENGTH>>2]; +} hash160_t; + +/* vim: set ts=2 sw=2 et ai si: */ +#endif /* __BRAINFLAYER_HASH160_H_ */ diff --git a/bf/hex.c b/bf/hex.c new file mode 100644 index 0000000..c39c427 --- /dev/null +++ b/bf/hex.c @@ -0,0 +1,20 @@ +/* Copyright (c) 2015 Ryan Castellucci, All Rights Reserved */ +#include +#include +#include + +#include "hex.h" + +unsigned char * +hex(unsigned char *buf, size_t buf_sz, + unsigned char *hexed, size_t hexed_sz) { + int i, j; + --hexed_sz; + for (i = j = 0; i < buf_sz && j < hexed_sz; ++i, j += 2) { + snprintf(hexed+j, 3, "%02x", buf[i]); + } + hexed[j] = 0; // null terminate + return hexed; +} + +/* vim: set ts=2 sw=2 et ai si: */ diff --git a/bf/hex.h b/bf/hex.h new file mode 100644 index 0000000..f603174 --- /dev/null +++ b/bf/hex.h @@ -0,0 +1,30 @@ +/* Copyright (c) 2015 Ryan Castellucci, All Rights Reserved */ +#ifndef __BRAINFLAYER_HEX_H_ +#define __BRAINFLAYER_HEX_H_ + +static const unsigned char unhex_tab[80] = { + 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, + 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; + +static inline unsigned char * +unhex(unsigned char *str, size_t str_sz, + unsigned char *unhexed, size_t unhexed_sz) { + int i, j; + for (i = j = 0; i < str_sz && j < unhexed_sz; i += 2, ++j) { + unhexed[j] = (unhex_tab[str[i+0]&0x4f] & 0xf0)| + (unhex_tab[str[i+1]&0x4f] & 0x0f); + } + return unhexed; +} + +unsigned char * hex(unsigned char *, size_t, unsigned char *, size_t); + +#endif /* __BRAINFLAYER_HEX_H_ */ +/* vim: set ts=2 sw=2 et ai si: */ diff --git a/bf/hex2blf.c b/bf/hex2blf.c new file mode 100644 index 0000000..4faf3e3 --- /dev/null +++ b/bf/hex2blf.c @@ -0,0 +1,114 @@ +/* Copyright (c) 2015 Ryan Castellucci, All Rights Reserved */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include /* for ntohl/htonl */ + +#include /* pow/exp */ + +#include "hex.h" +#include "bloom.h" +#include "hash160.h" + +const double k_hashes = 20; +const double m_bits = 4294967296; + +int main(int argc, char **argv) { + hash160_t hash; + int i; + double pct; + struct stat sb; + unsigned char *bloom, *hashfile, *bloomfile; + FILE *f, *b; + size_t line_sz = 1024, line_ct = 0; + char *line; + + double err_rate; + + if (argc != 3) { + fprintf(stderr, "[!] Usage: %s hashfile.hex bloomfile.blf\n", argv[0]); + exit(1); + } + + hashfile = argv[1]; + bloomfile = argv[2]; + + if ((f = fopen(hashfile, "r")) == NULL) { + fprintf(stderr, "[!] Failed to open hash160 file '%s'\n", hashfile); + exit(1); + } + + if ((bloom = malloc(BLOOM_SIZE)) == NULL) { + fprintf(stderr, "[!] malloc failed (bloom filter)\n"); + exit(1); + } + + if (stat(bloomfile, &sb) == 0) { + if (!S_ISREG(sb.st_mode) || sb.st_size != BLOOM_SIZE) { + fprintf(stderr, "[!] Bloom filter file '%s' is not the correct size (%ju != %d)\n", bloomfile, sb.st_size, BLOOM_SIZE); + exit(1); + } + if ((b = fopen(bloomfile, "r+")) == NULL) { + fprintf(stderr, "[!] Failed to open bloom filter file '%s' for read/write\n", bloomfile); + exit(1); + } + fprintf(stderr, "[*] Reading existing bloom filter from '%s'...\n", bloomfile); + if ((fread(bloom, BLOOM_SIZE, 1, b)) != 1 || (fseek(b, 0, SEEK_SET)) != 0) { + fprintf(stderr, "[!] Failed to read existing boom filter from '%s'\n", bloomfile); + exit(1); + } + } else { + /* Assume the file didn't exist - yes there is a race condition */ + if ((b = fopen(bloomfile, "w+")) == NULL) { + fprintf(stderr, "[!] Failed to create bloom filter file '%s'\n", bloomfile); + exit(1); + } + // start it empty + fprintf(stderr, "[*] Initializing bloom filter...\n"); + memset(bloom, 0, BLOOM_SIZE); + } + + if ((line = malloc(line_sz+1)) == NULL) { + fprintf(stderr, "[!] malloc failed (line buffer)\n"); + exit(1); + } + + i = 0; + stat(hashfile, &sb); + fprintf(stderr, "[*] Loading hash160s from '%s' \033[s 0.0%%", hashfile); + while (getline(&line, &line_sz, f) > 0) { + ++line_ct; + unhex(line, strlen(line), hash.uc, sizeof(hash.uc)); + bloom_set_hash160(bloom, hash.ul); + + if ((++i & 0x3ffff) == 0) { + pct = 100.0 * ftell(f) / sb.st_size; + fprintf(stderr, "\033[u%5.1f%%", pct); + fflush(stderr); + } + } + fprintf(stderr, "\033[u 100.0%%\n"); + + err_rate = pow(1 - exp(-k_hashes * line_ct / m_bits), k_hashes); + fprintf(stderr, "[*] Loaded %zu hashes, false positive rate: ~%.3e (1 in ~%.3e)\n", line_ct, err_rate, 1/err_rate); + + fprintf(stderr, "[*] Writing bloom filter to '%s'...\n", bloomfile); + if ((fwrite(bloom, BLOOM_SIZE, 1, b)) != 1) { + fprintf(stderr, "[!] Failed to write bloom filter file '%s'\n", bloomfile); + exit(1); + } + + fprintf(stderr, "[+] Success!\n"); + return 0; +} + +/* vim: set ts=2 sw=2 et ai si: */ diff --git a/bf/hexln.c b/bf/hexln.c new file mode 100644 index 0000000..9429983 --- /dev/null +++ b/bf/hexln.c @@ -0,0 +1,33 @@ +/* Copyright (c) 2015 Ryan Castellucci, All Rights Reserved */ +#include +#include + +#include "hex.h" + +int main(int argc, char **argv) { + char *line = NULL; + size_t line_sz = 0, buf_sz = 2; + ssize_t line_read; + unsigned char *buf = malloc(buf_sz); + + if (argc > 1) { + fprintf(stderr, "Usage: %s\n", argv[0]); + return 1; + } + + while ((line_read = getline(&line, &line_sz, stdin)) > 0) { + while (line_sz * 2 > buf_sz) { + buf_sz *= 2; + buf = realloc(buf, buf_sz); + } + if (buf == NULL) { + fprintf(stderr, "memory error\n"); + return 1; + } + printf("%s\n", hex(line, line_read - 1, buf, buf_sz)); + } + + return 0; +} + +/* vim: set ts=2 sw=2 et ai si: */ diff --git a/bf/hsearchf.c b/bf/hsearchf.c new file mode 100644 index 0000000..46d4bf2 --- /dev/null +++ b/bf/hsearchf.c @@ -0,0 +1,97 @@ +/* Copyright (c) 2015 Ryan Castellucci, All Rights Reserved */ +#include +#include +#include +#include +#include +#include + +#include /* for ntohl/htonl */ + +#include +#include +#include + +#include "hex.h" +#include "hash160.h" +#include "hsearchf.h" + +#define HSEARCHF_DEBUG 0 + +#define HASHLEN RIPEMD160_DIGEST_LENGTH + +#define RESULT(R) do { \ + res = R; \ + goto hsearchf_result; \ +} while (0) + +#define DO_MEMCMP(H) memcmp(H.uc, hash->uc, HASHLEN) + +// use fadvise to do a readbehind +#define READ_AT(X, H) do { \ + posix_fadvise(fileno(f), ((X*HASHLEN)&0xfffff000)-4096, 8192, POSIX_FADV_WILLNEED); \ + if ((ret = fseek(f, X * HASHLEN, 0)) != 0) { return -1; } \ + if ((ret = fread(H.uc, HASHLEN, 1, f)) != 1) { return -1; } \ + ++i; \ +} while (0) + +// interpolation search +int hsearchf(FILE *f, hash160_t *hash) { + int ret, res = 0, i = 0; + size_t file_sz; + struct stat sb; + hash160_t low_h, mid_h, high_h; + int64_t low_e, mid_e, high_e, entries; + int64_t vlow, vhigh, vtarget; + +#if HSEARCHF_DEBUG > 0 + unsigned char hexed[64]; +#endif + + if ((ret = fstat(fileno(f), &sb)) != 0) { return -1; } + file_sz = sb.st_size; + entries = file_sz / HASHLEN; + low_e = 0; + high_e = entries - 1; + + vtarget = ntohl(hash->ul[0]); + memset(low_h.uc, 0x00, HASHLEN); + memset(high_h.uc, 0xff, HASHLEN); + + // this tries to minimize reads, but does a few extra comparisons + while (low_e != high_e && + memcmp(hash->uc, low_h.uc, HASHLEN) > 0 && + memcmp(hash->uc, high_h.uc, HASHLEN) < 0) { + vlow = ntohl(low_h.ul[0]); vhigh = ntohl(high_h.ul[0]); + mid_e = low_e + (vtarget - vlow) * (high_e - low_e) / (vhigh - vlow); + READ_AT(mid_e, mid_h); + ret = DO_MEMCMP(mid_h); + +#if HSEARCHF_DEBUG > 1 + fprintf(stderr, "target %s checking %9jd %9jd %9jd", + hex(hash->uc, HASHLEN, hexed, sizeof(hexed)), low_e, mid_e, high_e); + fprintf(stderr, " got %s %11d %2d\n", + hex(mid_h.uc, HASHLEN, hexed, sizeof(hexed)), ret, i); +#endif + + if (ret == 0) { + RESULT(1); + } else if (ret < 0) { + low_e = mid_e + 1; + READ_AT(low_e, low_h); + if (DO_MEMCMP(low_h) == 0) { RESULT(1); } + } else { // ret > 0 + high_e = mid_e - 1; + READ_AT(high_e, high_h); + if (DO_MEMCMP(high_h) == 0) { RESULT(1); } + } + } + +hsearchf_result: +#if HSEARCHF_DEBUG > 0 + fprintf(stderr, "target: %s reads: %3d result: %d\n", hex(hash->uc, HASHLEN, hexed, sizeof(hexed)), i, res); +#endif + return res; +} + +/* vim: set ts=2 sw=2 et ai si: */ diff --git a/bf/hsearchf.h b/bf/hsearchf.h new file mode 100644 index 0000000..13b166e --- /dev/null +++ b/bf/hsearchf.h @@ -0,0 +1,8 @@ +/* Copyright (c) 2015 Ryan Castellucci, All Rights Reserved */ +#ifndef __BRAINFLAYER_HSEARCHF_H_ +#define __BRAINFLAYER_HSEARCHF_H_ + +int hsearchf(FILE *, hash160_t *); + +/* vim: set ts=2 sw=2 et ai si: */ +#endif /* __BRAINFLAYER_HSEARCHF_H_ */ diff --git a/bf/mmapf.c b/bf/mmapf.c new file mode 100644 index 0000000..678b853 --- /dev/null +++ b/bf/mmapf.c @@ -0,0 +1,139 @@ +/* Copyright (c) 2015 Ryan Castellucci, All Rights Reserved */ +#define _LARGEFILE_SOURCE +#define _LARGEFILE64_SOURCE + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "mmapf.h" + +static char *errstr[] = { + "Unknown error", + "Not a regular file", + "Incorrect file size", + "" +}; + +char * mmapf_strerror(int errnum) { + if (errnum < MMAPF_EXFIRST) { + return strerror(errnum); + } else if (errnum < MMAPF_EXLAST) { + return errstr[errnum-MMAPF_EXFIRST]; + } else { + return errstr[0]; + } +} + +int munmapf(mmapf_ctx *ctx) { + // TODO error checking + if (ctx->fd >= 0) { + msync(ctx->mem, ctx->file_sz, MS_SYNC); + fsync(ctx->fd); + close(ctx->fd); + } + if (ctx->mem != NULL) { + munmap(ctx->mem, ctx->mmap_sz); + } + ctx->file_sz = 0; + ctx->mmap_sz = 0; + ctx->mem = NULL; + ctx->fd = -1; + return 0; +} + +int mmapf(mmapf_ctx *ctx, const unsigned char *filename, size_t size, int flags) { + size_t page_sz = sysconf(_SC_PAGESIZE); + struct stat sb; + + int mmode = 0, mflags = 0, madv = 0; + int fmode = 0, fadv = 0; + int ret, fd; + + // initialize + ctx->mem = NULL; + ctx->fd = -1; + ctx->file_sz = size; + + // round up to the next multiple of the page size + ctx->mmap_sz = size % page_sz ? (size/page_sz+1)*page_sz : size; + + mflags |= flags & MMAPF_COW ? MAP_PRIVATE : MAP_SHARED; + + if (flags & MMAPF_RW) { + mmode |= PROT_READ|PROT_WRITE; + fmode |= O_RDWR; + } else if (flags & MMAPF_RD) { + mflags |= MAP_NORESERVE; + mmode |= PROT_READ; + fmode |= O_RDONLY; + } else if (flags & MMAPF_WR) { + mmode |= PROT_WRITE; + fmode |= O_WRONLY; + } + + if (flags & MMAPF_CR) { fmode |= O_CREAT; } + if (flags & MMAPF_EX) { fmode |= O_EXCL; } + if (flags & MMAPF_PRE) { mflags |= MAP_POPULATE; } + if (flags & MMAPF_NOREUSE) { fadv |= POSIX_FADV_NOREUSE; } + if (flags & MMAPF_RND) { fadv |= POSIX_FADV_RANDOM; madv |= POSIX_MADV_RANDOM; } + if (flags & MMAPF_SEQ) { fadv |= POSIX_FADV_SEQUENTIAL; madv |= POSIX_MADV_SEQUENTIAL; } + if (flags & MMAPF_DONTNEED) { fadv |= POSIX_FADV_DONTNEED; madv |= POSIX_MADV_DONTNEED; } + if (flags & MMAPF_WILLNEED) { + fadv |= POSIX_FADV_WILLNEED; + // seems to fail on anonymous maps + if (filename) { madv |= POSIX_MADV_WILLNEED; } + } + + if (!filename) { + ctx->mem = mmap(NULL, ctx->mmap_sz, mmode, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + } else { + if (stat(filename, &sb) == 0) { // file exists + if (!S_ISREG(sb.st_mode)) { return MMAPF_ENREG; } // not a regular file + if (sb.st_size != size) { return MMAPF_ESIZE; } // wrong size + if ((fd = open64(filename, fmode)) < 0) { return errno; } // open failed + } else if (flags & MMAPF_CR) { // file missing, but creation requested + if ((fd = open64(filename, fmode)) < 0) { return errno; } // open failed + if ((ret = posix_fallocate(fd, 0, size)) != 0) { + // EBADF is returned on an unsupported filesystem, ignore it + if (ret != EBADF) { return ret; } + } + } else { // file missing, creation *not* requested + return ENOENT; + } + + //if ((ret = posix_fadvise(fd, 0, size, fadv)) != 0) { return ret; } + posix_fadvise(fd, 0, size, fadv); // ignore result + ctx->mem = mmap(NULL, ctx->mmap_sz, mmode, mflags, fd, 0); + } + + if (ctx->mem == MAP_FAILED) { + return errno; + } else if (ctx->mem == NULL) { + return ENOMEM; + } + + if ((ret = posix_madvise(ctx->mem, ctx->mmap_sz, madv)) != 0) { + munmap(ctx->mem, ctx->mmap_sz); + ctx->mem = NULL; + return ret; + } + +#ifdef MADV_HUGEPAGE + // reduce overhead for large mappings + if (size > (1<<26)) { madvise(ctx->mem, ctx->mmap_sz, MADV_HUGEPAGE); } +#endif +#ifdef MADV_DONTDUMP + // don't include in a core dump + madvise(ctx->mem, ctx->mmap_sz, MADV_DONTDUMP); +#endif + + return MMAPF_OKAY; +} diff --git a/bf/mmapf.h b/bf/mmapf.h new file mode 100644 index 0000000..4273f61 --- /dev/null +++ b/bf/mmapf.h @@ -0,0 +1,45 @@ +/* Copyright (c) 2015 Ryan Castellucci, All Rights Reserved */ +#ifndef __MMAPF_H_ +#define __MMAPF_H_ + +typedef struct { + void *mem; + size_t mmap_sz; + size_t file_sz; + int fd; +} mmapf_ctx; + +// file flags +#define MMAPF_RD 0x0001 +#define MMAPF_WR 0x0002 +#define MMAPF_RW (MMAPF_RD|MMAPF_WR) +#define MMAPF_CR 0x0004 +#define MMAPF_EX 0x0008 + +// advise flags +#define MMAPF_RND 0x0100 +#define MMAPF_SEQ 0x0200 +#define MMAPF_PRE 0x0400 +#define MMAPF_COW 0x0800 +#define MMAPF_NOREUSE 0x1000 +#define MMAPF_WILLNEED 0x2000 +#define MMAPF_DONTNEED 0x4000 + +// convenience +#define MMAPF_RNDRD (MMAPF_RD|MMAPF_RND|MMAPF_PRE|MMAPF_WILLNEED) +#define MMAPF_RNDUP (MMAPF_RW|MMAPF_RND|MMAPF_PRE|MMAPF_WILLNEED) +#define MMAPF_SEQCR (MMAPF_WR|MMAPF_SEQ|MMAPF_CR|MMAPF_EX|MMAPF_NOREUSE|MMAPF_DONTNEED) + +// returns +#define MMAPF_OKAY 0 +#define MMAPF_EXFIRST 1000 +#define MMAPF_ENREG 1001 +#define MMAPF_ESIZE 1002 +#define MMAPF_EXLAST 1003 + +char * mmapf_strerror(int); +int mmapf(mmapf_ctx *, const unsigned char *, size_t, int); +int munmapf(mmapf_ctx *); + +/* vim: set ts=2 sw=2 et ai si: */ +#endif /* __MMAPF_H_ */ diff --git a/bf/ripemd160_256.c b/bf/ripemd160_256.c new file mode 100644 index 0000000..b7a9126 --- /dev/null +++ b/bf/ripemd160_256.c @@ -0,0 +1,286 @@ +#define _RIPEMD160_C_ 1 + +#include "ripemd160_256.h" + +// adapted by Pieter Wuille in 2012; all changes are in the public domain +// modified by Ryan Castellucci in 2015; all changes are in the public domain + +/* + * + * RIPEMD160.c : RIPEMD-160 implementation + * + * Written in 2008 by Dwayne C. Litzenberger + * + * =================================================================== + * The contents of this file are dedicated to the public domain. To + * the extent that dedication to the public domain is not available, + * everyone is granted a worldwide, perpetual, royalty-free, + * non-exclusive license to exercise all rights associated with the + * contents of this file for any purpose whatsoever. + * No rights are reserved. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * =================================================================== + * + * Country of origin: Canada + * + * This implementation (written in C) is based on an implementation the author + * wrote in Python. + * + * This implementation was written with reference to the RIPEMD-160 + * specification, which is available at: + * http://homes.esat.kuleuven.be/~cosicart/pdf/AB-9601/ + * + * It is also documented in the _Handbook of Applied Cryptography_, as + * Algorithm 9.55. It's on page 30 of the following PDF file: + * http://www.cacr.math.uwaterloo.ca/hac/about/chap9.pdf + * + * The RIPEMD-160 specification doesn't really tell us how to do padding, but + * since RIPEMD-160 is inspired by MD4, you can use the padding algorithm from + * RFC 1320. + * + * According to http://www.users.zetnet.co.uk/hopwood/crypto/scan/md.html: + * "RIPEMD-160 is big-bit-endian, little-byte-endian, and left-justified." + */ + +#include + +#include + +#define RIPEMD160_DIGEST_SIZE 20 +#define BLOCK_SIZE 64 + +/* cyclic left-shift the 32-bit word n left by s bits */ +#define ROL(s, n) (((n) << (s)) | ((n) >> (32-(s)))) + +/* Initial values for the chaining variables. + * This is just 0123456789ABCDEFFEDCBA9876543210F0E1D2C3 in little-endian. */ +static const uint32_t initial_h[5] = { 0x67452301u, 0xEFCDAB89u, 0x98BADCFEu, 0x10325476u, 0xC3D2E1F0u }; + +/* Ordering of message words. Based on the permutations rho(i) and pi(i), defined as follows: + * + * rho(i) := { 7, 4, 13, 1, 10, 6, 15, 3, 12, 0, 9, 5, 2, 14, 11, 8 }[i] 0 <= i <= 15 + * + * pi(i) := 9*i + 5 (mod 16) + * + * Line | Round 1 | Round 2 | Round 3 | Round 4 | Round 5 + * -------+-----------+-----------+-----------+-----------+----------- + * left | id | rho | rho^2 | rho^3 | rho^4 + * right | pi | rho pi | rho^2 pi | rho^3 pi | rho^4 pi + */ + +/* Left line */ +static const uint8_t RL[5][16] = { + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, /* Round 1: id */ + { 7, 4, 13, 1, 10, 6, 15, 3, 12, 0, 9, 5, 2, 14, 11, 8 }, /* Round 2: rho */ + { 3, 10, 14, 4, 9, 15, 8, 1, 2, 7, 0, 6, 13, 11, 5, 12 }, /* Round 3: rho^2 */ + { 1, 9, 11, 10, 0, 8, 12, 4, 13, 3, 7, 15, 14, 5, 6, 2 }, /* Round 4: rho^3 */ + { 4, 0, 5, 9, 7, 12, 2, 10, 14, 1, 3, 8, 11, 6, 15, 13 } /* Round 5: rho^4 */ +}; + +/* Right line */ +static const uint8_t RR[5][16] = { + { 5, 14, 7, 0, 9, 2, 11, 4, 13, 6, 15, 8, 1, 10, 3, 12 }, /* Round 1: pi */ + { 6, 11, 3, 7, 0, 13, 5, 10, 14, 15, 8, 12, 4, 9, 1, 2 }, /* Round 2: rho pi */ + { 15, 5, 1, 3, 7, 14, 6, 9, 11, 8, 12, 2, 10, 0, 4, 13 }, /* Round 3: rho^2 pi */ + { 8, 6, 4, 1, 3, 11, 15, 0, 5, 12, 2, 13, 9, 7, 10, 14 }, /* Round 4: rho^3 pi */ + { 12, 15, 10, 4, 1, 5, 8, 7, 6, 2, 13, 14, 0, 3, 9, 11 } /* Round 5: rho^4 pi */ +}; + +/* + * Shifts - Since we don't actually re-order the message words according to + * the permutations above (we could, but it would be slower), these tables + * come with the permutations pre-applied. + */ + +/* Shifts, left line */ +static const uint8_t SL[5][16] = { + { 11, 14, 15, 12, 5, 8, 7, 9, 11, 13, 14, 15, 6, 7, 9, 8 }, /* Round 1 */ + { 7, 6, 8, 13, 11, 9, 7, 15, 7, 12, 15, 9, 11, 7, 13, 12 }, /* Round 2 */ + { 11, 13, 6, 7, 14, 9, 13, 15, 14, 8, 13, 6, 5, 12, 7, 5 }, /* Round 3 */ + { 11, 12, 14, 15, 14, 15, 9, 8, 9, 14, 5, 6, 8, 6, 5, 12 }, /* Round 4 */ + { 9, 15, 5, 11, 6, 8, 13, 12, 5, 12, 13, 14, 11, 8, 5, 6 } /* Round 5 */ +}; + +/* Shifts, right line */ +static const uint8_t SR[5][16] = { + { 8, 9, 9, 11, 13, 15, 15, 5, 7, 7, 8, 11, 14, 14, 12, 6 }, /* Round 1 */ + { 9, 13, 15, 7, 12, 8, 9, 11, 7, 7, 12, 7, 6, 15, 13, 11 }, /* Round 2 */ + { 9, 7, 15, 11, 8, 6, 6, 14, 12, 13, 5, 14, 13, 13, 7, 5 }, /* Round 3 */ + { 15, 5, 8, 11, 14, 14, 6, 14, 6, 9, 12, 9, 12, 5, 15, 8 }, /* Round 4 */ + { 8, 5, 12, 9, 12, 5, 14, 6, 8, 13, 6, 5, 15, 13, 11, 11 } /* Round 5 */ +}; + +/* static padding for 256 bit input */ +static const uint8_t pad256[32] = { + 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + /* length 256 bits, little endian uint64_t */ + 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; + +/* Boolean functions */ + +#define F1(x, y, z) ((x) ^ (y) ^ (z)) +#define F2(x, y, z) (((x) & (y)) | (~(x) & (z))) +#define F3(x, y, z) (((x) | ~(y)) ^ (z)) +#define F4(x, y, z) (((x) & (z)) | ((y) & ~(z))) +#define F5(x, y, z) ((x) ^ ((y) | ~(z))) + +/* Round constants, left line */ +static const uint32_t KL[5] = { + 0x00000000u, /* Round 1: 0 */ + 0x5A827999u, /* Round 2: floor(2**30 * sqrt(2)) */ + 0x6ED9EBA1u, /* Round 3: floor(2**30 * sqrt(3)) */ + 0x8F1BBCDCu, /* Round 4: floor(2**30 * sqrt(5)) */ + 0xA953FD4Eu /* Round 5: floor(2**30 * sqrt(7)) */ +}; + +/* Round constants, right line */ +static const uint32_t KR[5] = { + 0x50A28BE6u, /* Round 1: floor(2**30 * cubert(2)) */ + 0x5C4DD124u, /* Round 2: floor(2**30 * cubert(3)) */ + 0x6D703EF3u, /* Round 3: floor(2**30 * cubert(5)) */ + 0x7A6D76E9u, /* Round 4: floor(2**30 * cubert(7)) */ + 0x00000000u /* Round 5: 0 */ +}; + +static inline void byteswap32(uint32_t *v) +{ + union { uint32_t w; uint8_t b[4]; } x, y; + + x.w = *v; + y.b[0] = x.b[3]; + y.b[1] = x.b[2]; + y.b[2] = x.b[1]; + y.b[3] = x.b[0]; + *v = y.w; + + /* Wipe temporary variables */ + x.w = y.w = 0; +} + +static inline void byteswap_digest(uint32_t *p) +{ + unsigned int i; + + for (i = 0; i < 4; i++) { + byteswap32(p++); + byteswap32(p++); + byteswap32(p++); + byteswap32(p++); + } +} + +/* The RIPEMD160 compression function. */ +static inline void ripemd160_rawcompress(void *pbuf, void *ph) +{ + uint8_t w, round; + uint32_t T; + uint32_t AL, BL, CL, DL, EL; /* left line */ + uint32_t AR, BR, CR, DR, ER; /* right line */ + + uint32_t *buf = pbuf; + uint32_t *h = ph; + + /* Byte-swap the buffer if we're on a big-endian machine */ +#ifdef PCT_BIG_ENDIAN + byteswap_digest(buf); +#endif + + /* initialize state */ + memcpy(h, initial_h, RIPEMD160_DIGEST_SIZE); + + /* Load the left and right lines with the initial state */ + AL = AR = h[0]; + BL = BR = h[1]; + CL = CR = h[2]; + DL = DR = h[3]; + EL = ER = h[4]; + + /* Round 1 */ + round = 0; + for (w = 0; w < 16; w++) { /* left line */ + T = ROL(SL[round][w], AL + F1(BL, CL, DL) + buf[RL[round][w]] + KL[round]) + EL; + AL = EL; EL = DL; DL = ROL(10, CL); CL = BL; BL = T; + } + for (w = 0; w < 16; w++) { /* right line */ + T = ROL(SR[round][w], AR + F5(BR, CR, DR) + buf[RR[round][w]] + KR[round]) + ER; + AR = ER; ER = DR; DR = ROL(10, CR); CR = BR; BR = T; + } + + /* Round 2 */ + round++; + for (w = 0; w < 16; w++) { /* left line */ + T = ROL(SL[round][w], AL + F2(BL, CL, DL) + buf[RL[round][w]] + KL[round]) + EL; + AL = EL; EL = DL; DL = ROL(10, CL); CL = BL; BL = T; + } + for (w = 0; w < 16; w++) { /* right line */ + T = ROL(SR[round][w], AR + F4(BR, CR, DR) + buf[RR[round][w]] + KR[round]) + ER; + AR = ER; ER = DR; DR = ROL(10, CR); CR = BR; BR = T; + } + + /* Round 3 */ + round++; + for (w = 0; w < 16; w++) { /* left line */ + T = ROL(SL[round][w], AL + F3(BL, CL, DL) + buf[RL[round][w]] + KL[round]) + EL; + AL = EL; EL = DL; DL = ROL(10, CL); CL = BL; BL = T; + } + for (w = 0; w < 16; w++) { /* right line */ + T = ROL(SR[round][w], AR + F3(BR, CR, DR) + buf[RR[round][w]] + KR[round]) + ER; + AR = ER; ER = DR; DR = ROL(10, CR); CR = BR; BR = T; + } + + /* Round 4 */ + round++; + for (w = 0; w < 16; w++) { /* left line */ + T = ROL(SL[round][w], AL + F4(BL, CL, DL) + buf[RL[round][w]] + KL[round]) + EL; + AL = EL; EL = DL; DL = ROL(10, CL); CL = BL; BL = T; + } + for (w = 0; w < 16; w++) { /* right line */ + T = ROL(SR[round][w], AR + F2(BR, CR, DR) + buf[RR[round][w]] + KR[round]) + ER; + AR = ER; ER = DR; DR = ROL(10, CR); CR = BR; BR = T; + } + + /* Round 5 */ + round++; + for (w = 0; w < 16; w++) { /* left line */ + T = ROL(SL[round][w], AL + F5(BL, CL, DL) + buf[RL[round][w]] + KL[round]) + EL; + AL = EL; EL = DL; DL = ROL(10, CL); CL = BL; BL = T; + } + for (w = 0; w < 16; w++) { /* right line */ + T = ROL(SR[round][w], AR + F1(BR, CR, DR) + buf[RR[round][w]] + KR[round]) + ER; + AR = ER; ER = DR; DR = ROL(10, CR); CR = BR; BR = T; + } + + /* Final mixing stage */ + T = h[1] + CL + DR; + h[1] = h[2] + DL + ER; + h[2] = h[3] + EL + AR; + h[3] = h[4] + AL + BR; + h[4] = h[0] + BL + CR; + h[0] = T; + + /* Byte-swap the output if we're on a big-endian machine */ +#ifdef PCT_BIG_ENDIAN + byteswap_digest(h); +#endif +} + +void ripemd160_256(const void *in, void *out) { + unsigned char buf[64]; + /* copy input data */ + memcpy(buf + 0, in, 32); + /* append fixed padding */ + memcpy(buf + 32, pad256, 32); + /* compute and output hash */ + ripemd160_rawcompress(buf, out); +} diff --git a/bf/ripemd160_256.h b/bf/ripemd160_256.h new file mode 100644 index 0000000..84e51d8 --- /dev/null +++ b/bf/ripemd160_256.h @@ -0,0 +1,8 @@ +#ifndef __RIPEMD160_256_H_ +#define __RIPEMD160_256_H_ + +#include + +void ripemd160_256(const void *in, void *out); + +#endif//__RIPEMD160_256_H_ diff --git a/bf/scrypt-jane/.gitignore b/bf/scrypt-jane/.gitignore new file mode 100644 index 0000000..b557eae --- /dev/null +++ b/bf/scrypt-jane/.gitignore @@ -0,0 +1,3 @@ +scrypt_test +scrypt_speed +*.o diff --git a/bf/scrypt-jane/README.md b/bf/scrypt-jane/README.md new file mode 100644 index 0000000..9585a0c --- /dev/null +++ b/bf/scrypt-jane/README.md @@ -0,0 +1,163 @@ +This project provides a performant, flexible implementations of Colin Percival's [scrypt](http://www.tarsnap.com/scrypt.html). + +# Features + +## Modular Design + +The code uses a modular (compile, not runtime) layout to allow new mixing & hash functions to be added easily. The base components (HMAC, PBKDF2, and scrypt) are static and will immediately work with any conforming mix or hash function. + +## Supported Mix Functions + +* [Salsa20/8](http://cr.yp.to/salsa20.html) +* [ChaCha20/8](http://cr.yp.to/chacha.html) +* [Salsa6420/8]() + +I am not actually aware of any other candidates for a decent mix function. Salsa20/8 was nearly perfect, but its successor, ChaCha20/8, has better diffusion and is thus stronger, is potentially faster given advanced SIMD support (byte level shuffles, or a 32bit rotate), and is slightly cleaner to implement given that it requires no pre/post processing of data for SIMD implementations. + +64-byte blocks are no longer assumed! Salsa6420/8 is a 'proof of concept' 64-bit version of Salsa20/8 with a 128 byte block, and rotation constants chosen to allow 32-bit word shuffles instead of rotations for two of the rotations which put it on par with ChaCha in terms of SSE implementation shortcuts. + +## Supported Hash Functions + +* SHA256/512 +* [BLAKE256/512](https://www.131002.net/blake/) +* [Skein512](http://www.skein-hash.info/) +* [Keccak256/512](http://keccak.noekeon.org/) (SHA-3) + +Hash function implementations, unlike mix functions, are not optimized. The PBKDF2 computations are relatively minor in the scrypt algorithm, so including CPU specific versions, or vastly unrolling loops, would serve little purpose while bloating the code, both source and binary, and making it more confusing to implement correctly. + +Most (now only two!) of the SHA-3 candidates fall in to the "annoying to read/implement" category and have not been included yet. This will of course be moot once ~~BLAKE is chosen as SHA-3~~ Keccak is chosen as SHA-3. Well shit. + +## CPU Adaptation + +The mixing function specialization is selected at runtime based on what the CPU supports (well, x86/x86-64 for now, but theoretically any). On platforms where this is not needed, e.g. where packages are usually compiled from source, it can also select the most suitable implementation at compile time, cutting down on binary size. + +For those who are familiar with the scrypt spec, the code specializes at the ROMix level, allowing all copy, and xor calls to be inlined efficiently. ***Update***: This is actually not as important as I switched from specializing at the mix() level and letting the compiler somewhat inefficiently inline block_copy and block_xor to specializing at ChunkMix(), where they can be inlined properly. I thought about specializing at ROMix(), but it would increase the complexity per mix function even more and would not present many more opportunities than what is generated by the compiler presently. + +MSVC uses SSE intrinsics as opposed to inline assembly for the mix functions to allow the compiler to fully inline properly. Also, Visual Studio is not smart enough to allow inline assembly in 64-bit code. + +## Self Testing + +On first use, scrypt() runs a small series of tests to make sure the hash function, mix functions, and scrypt() itself, are generating correct results. It will exit() (or call a user defined fatal error function) should any of these tests fail. + +Test vectors for individual mix and hash functions are generated from reference implementations. The only "official" test vectors for the full scrypt() are for SHA256 + Salsa20/8 of course; other combinations are generated from this code (once it works with all reference test vectors) and subject to change if any implementation errors are discovered. + +# Performance (on an E5200 2.5GHZ) + +Benchmarks are run _without_ allocating memory, i.e. allocating enough memory before the trials are run. Different allocators can have different costs and non-deterministic effects, which is not the point of comparing implementations. The only hash function compared will be SHA-256 to be comparable to Colin's reference implementation, and the hash function will generally be a fraction of a percent of noise in the overall result. + +Three different scrypt settings are tested (the last two are from the scrypt paper): + +* 'High Volume': N=4096, r=8, p=1, 4mb memory +* 'Interactive': N=16384, r=8, p=1, 16mb memory +* 'Non-Interactive': N=1048576, r=8, p=1, 1gb memory + +__Note__: Benchmark settings are adjusted based on the underlying block size to keep memory usage consistent with default scrypt. This means Salsa64 has r=4 due to having a 128 byte block size. A 256 byte block size would have r=2, 512 byte block would have r=1, etc. Additionally, this means Salsa6420/8 is doing half the rounds/byte of default scrypt, but has 64 bit word mixing vs 32 bit, and thus does somewhat less overall mixing. Salsa6420/~10-12 would be needed to maintain equivalent overall mixing. + +Cycle counts are in millions of cycles. All versions compiled with gcc 4.6.3, -O3. Sorted from fastest to slowest. + +Scaling refers to how much more expensive 'Non-Interactive' is to compute than 'High Volume', normalized to "ideal" scaling (256x difficulty). Under 100% means it becomes easier to process as N grows, over 100% means it becomes more difficult to process as N grows. + + + + + + + + + + + + + + + + + +
ImplemenationAlgoHigh VolumeInteractiveNon-InteractiveScaling
scrypt-jane SSSE3 64bitSalsa6420/8 18.2m 75.6m5120.0m110.0%
scrypt-jane SSSE3 64bitChaCha20/8 19.6m 79.6m5296.7m105.6%
scrypt-jane SSSE3 32bitChaCha20/8 19.8m 80.3m5346.1m105.5%
scrypt-jane SSE2 64bit Salsa6420/8 19.8m 82.1m5529.2m109.1%
scrypt-jane SSE2 64bit Salsa20/8 22.1m 89.7m5938.8m105.0%
scrypt-jane SSE2 32bit Salsa20/8 22.3m 90.6m6011.0m105.3%
scrypt-jane SSE2 64bit ChaCha20/8 23.9m 96.8m6399.7m104.6%
scrypt-jane SSE2 32bit ChaCha20/8 24.2m 98.3m6500.7m104.9%
*Reference SSE2 64bit* Salsa20/8 32.9m135.2m8881.6m105.5%
*Reference SSE2 32bit* Salsa20/8 33.0m134.4m8885.2m105.2%
+ +* scrypt-jane Salsa6420/8-SSSE3 is ~1.80x faster than reference Salsa20/8-SSE2 for High Volume, but drops to 1.73x faster for 'Non-Interactive' instead of remaining constant +* scrypt-jane ChaCha20/8-SSSE3 is ~1.67x faster than reference Salsa20/8-SSE2 +* scrypt-jane Salsa20/8-SSE2 is ~1.48x faster than reference Salsa20/8-SSE2 + +# Performance (on a slightly noisy E3-1270 3.4GHZ) + +All versions compiled with gcc 4.4.7, -O3. Sorted from fastest to slowest. + + + + + + + + + + + + + + + + + + + + + + +
ImplemenationAlgoHigh VolumeInteractiveNon-InteractiveScaling
scrypt-jane AVX 64bit Salsa6420/8 11.8m 52.5m3848.6m127.4%
scrypt-jane SSSE3 64bit Salsa6420/8 13.3m 57.9m4176.6m122.7%
scrypt-jane SSE2 64bit Salsa6420/8 14.2m 61.1m4382.4m120.6%
scrypt-jane AVX 64bit ChaCha20/8 18.0m 77.4m5396.8m117.1%
scrypt-jane AVX 32bit ChaCha20/8 18.3m 82.1m5421.8m115.7%
scrypt-jane SSSE3 64bit ChaCha20/8 19.0m 81.3m5600.7m115.1%
scrypt-jane AVX 64bit Salsa20/8 19.0m 81.2m5610.6m115.3%
scrypt-jane AVX 32bit Salsa20/8 19.0m 81.3m5621.6m115.6%
scrypt-jane SSSE3 32bit ChaCha20/8 19.1m 81.8m5621.6m115.0%
scrypt-jane SSE2 64bit Salsa20/8 19.5m 83.8m5772.9m115.6%
scrypt-jane SSE2 32bit Salsa20/8 19.6m 84.0m5793.9m115.5%
*Reference SSE2/AVX 64bit* Salsa20/8 21.5m 90.4m6147.1m111.7%
*Reference SSE2/AVX 32bit* Salsa20/8 22.3m 94.0m6267.7m110.0%
scrypt-jane SSE2 64bit ChaCha20/8 23.1m 97.7m6670.0m112.8%
scrypt-jane SSE2 32bit ChaCha20/8 23.3m 98.4m6728.7m112.8%
*Reference SSE2 64bit* Salsa20/8 30.4m125.6m8139.4m104.6%
*Reference SSE2 32bit* Salsa20/8 30.0m124.5m8469.3m110.3%
+ +* scrypt-jane Salsa6420/8-AVX is 1.60x - 1.82x faster than reference Salsa20/8-SSE2/AVX +* scrypt-jane ChaCha20/8-AVX is 1.13x - 1.19x faster than reference Salsa20/8-SSE2/AVX +* scrypt-jane Salsa20/8-AVX is 1.09x - 1.13x faster than reference Salsa20/8-SSE2/AVX + + +# Building + + [gcc,icc,clang] scrypt-jane.c -O3 -[m32,m64] -DSCRYPT_MIX -DSCRYPT_HASH -c + +where SCRYPT_MIX is one of + +* SCRYPT_SALSA +* SCRYPT_SALSA64 (no optimized 32-bit implementation) +* SCRYPT_CHACHA + +and SCRYPT_HASH is one of + +* SCRYPT_SHA256 +* SCRYPT_SHA512 +* SCRYPT_BLAKE256 +* SCRYPT_BLAKE512 +* SCRYPT_SKEIN512 +* SCRYPT_KECCAK256 +* SCRYPT_KECCAK512 + +e.g. + + gcc scrypt-jane.c -O3 -DSCRYPT_CHACHA -DSCRYPT_BLAKE512 -c + gcc example.c scrypt-jane.o -o example + +clang *may* need "-no-integrated-as" as some? versions don't support ".intel_syntax" + +# Using + + #include "scrypt-jane.h" + + scrypt(password, password_len, salt, salt_len, Nfactor, pfactor, rfactor, out, want_bytes); + +## scrypt parameters + +* Nfactor: Increases CPU & Memory Hardness +* rfactor: Increases Memory Hardness +* pfactor: Increases CPU Hardness + +In scrypt terms + +* N = (1 << (Nfactor + 1)), which controls how many times to mix each chunk, and how many temporary chunks are used. Increasing N increases both CPU time and memory used. +* r = (1 << rfactor), which controls how many blocks are in a chunk (i.e., 2 * r blocks are in a chunk). Increasing r increases how much memory is used. +* p = (1 << pfactor), which controls how many passes to perform over the set of N chunks. Increasing p increases CPU time used. + +I chose to use the log2 of each parameter as it is the common way to communicate settings (e.g. 2^20, not 1048576). + +# License + +Public Domain, or MIT \ No newline at end of file diff --git a/bf/scrypt-jane/code/scrypt-conf.h b/bf/scrypt-jane/code/scrypt-conf.h new file mode 100644 index 0000000..46685a5 --- /dev/null +++ b/bf/scrypt-jane/code/scrypt-conf.h @@ -0,0 +1,28 @@ +/* + pick the best algo at runtime or compile time? + ---------------------------------------------- + SCRYPT_CHOOSE_COMPILETIME (gcc only!) + SCRYPT_CHOOSE_RUNTIME +*/ +#define SCRYPT_CHOOSE_RUNTIME + + +/* + hash function to use + ------------------------------- + SCRYPT_BLAKE256 + SCRYPT_BLAKE512 + SCRYPT_SHA256 + SCRYPT_SHA512 + SCRYPT_SKEIN512 +*/ +//#define SCRYPT_SHA256 + + +/* + block mixer to use + ----------------------------- + SCRYPT_CHACHA + SCRYPT_SALSA +*/ +//#define SCRYPT_SALSA diff --git a/bf/scrypt-jane/code/scrypt-jane-chacha.h b/bf/scrypt-jane/code/scrypt-jane-chacha.h new file mode 100644 index 0000000..5423457 --- /dev/null +++ b/bf/scrypt-jane/code/scrypt-jane-chacha.h @@ -0,0 +1,162 @@ +#define SCRYPT_MIX_BASE "ChaCha20/8" + +typedef uint32_t scrypt_mix_word_t; + +#define SCRYPT_WORDTO8_LE U32TO8_LE +#define SCRYPT_WORD_ENDIAN_SWAP U32_SWAP + +#define SCRYPT_BLOCK_BYTES 64 +#define SCRYPT_BLOCK_WORDS (SCRYPT_BLOCK_BYTES / sizeof(scrypt_mix_word_t)) + +/* must have these here in case block bytes is ever != 64 */ +#include "scrypt-jane-romix-basic.h" + +#include "scrypt-jane-mix_chacha-xop.h" +#include "scrypt-jane-mix_chacha-avx.h" +#include "scrypt-jane-mix_chacha-ssse3.h" +#include "scrypt-jane-mix_chacha-sse2.h" +#include "scrypt-jane-mix_chacha.h" + +#if defined(SCRYPT_CHACHA_XOP) + #define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_xop + #define SCRYPT_ROMIX_FN scrypt_ROMix_xop + #define SCRYPT_MIX_FN chacha_core_xop + #define SCRYPT_ROMIX_TANGLE_FN scrypt_romix_nop + #define SCRYPT_ROMIX_UNTANGLE_FN scrypt_romix_nop + #include "scrypt-jane-romix-template.h" +#endif + +#if defined(SCRYPT_CHACHA_AVX) + #define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_avx + #define SCRYPT_ROMIX_FN scrypt_ROMix_avx + #define SCRYPT_MIX_FN chacha_core_avx + #define SCRYPT_ROMIX_TANGLE_FN scrypt_romix_nop + #define SCRYPT_ROMIX_UNTANGLE_FN scrypt_romix_nop + #include "scrypt-jane-romix-template.h" +#endif + +#if defined(SCRYPT_CHACHA_SSSE3) + #define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_ssse3 + #define SCRYPT_ROMIX_FN scrypt_ROMix_ssse3 + #define SCRYPT_MIX_FN chacha_core_ssse3 + #define SCRYPT_ROMIX_TANGLE_FN scrypt_romix_nop + #define SCRYPT_ROMIX_UNTANGLE_FN scrypt_romix_nop + #include "scrypt-jane-romix-template.h" +#endif + +#if defined(SCRYPT_CHACHA_SSE2) + #define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_sse2 + #define SCRYPT_ROMIX_FN scrypt_ROMix_sse2 + #define SCRYPT_MIX_FN chacha_core_sse2 + #define SCRYPT_ROMIX_TANGLE_FN scrypt_romix_nop + #define SCRYPT_ROMIX_UNTANGLE_FN scrypt_romix_nop + #include "scrypt-jane-romix-template.h" +#endif + +/* cpu agnostic */ +#define SCRYPT_ROMIX_FN scrypt_ROMix_basic +#define SCRYPT_MIX_FN chacha_core_basic +#define SCRYPT_ROMIX_TANGLE_FN scrypt_romix_convert_endian +#define SCRYPT_ROMIX_UNTANGLE_FN scrypt_romix_convert_endian +#include "scrypt-jane-romix-template.h" + +#if !defined(SCRYPT_CHOOSE_COMPILETIME) +static scrypt_ROMixfn +scrypt_getROMix(void) { + size_t cpuflags = detect_cpu(); + +#if defined(SCRYPT_CHACHA_XOP) + if (cpuflags & cpu_xop) + return scrypt_ROMix_xop; + else +#endif + +#if defined(SCRYPT_CHACHA_AVX) + if (cpuflags & cpu_avx) + return scrypt_ROMix_avx; + else +#endif + +#if defined(SCRYPT_CHACHA_SSSE3) + if (cpuflags & cpu_ssse3) + return scrypt_ROMix_ssse3; + else +#endif + +#if defined(SCRYPT_CHACHA_SSE2) + if (cpuflags & cpu_sse2) + return scrypt_ROMix_sse2; + else +#endif + + return scrypt_ROMix_basic; +} +#endif + + +#if defined(SCRYPT_TEST_SPEED) +static size_t +available_implementations(void) { + size_t cpuflags = detect_cpu(); + size_t flags = 0; + +#if defined(SCRYPT_CHACHA_XOP) + if (cpuflags & cpu_xop) + flags |= cpu_xop; +#endif + +#if defined(SCRYPT_CHACHA_AVX) + if (cpuflags & cpu_avx) + flags |= cpu_avx; +#endif + +#if defined(SCRYPT_CHACHA_SSSE3) + if (cpuflags & cpu_ssse3) + flags |= cpu_ssse3; +#endif + +#if defined(SCRYPT_CHACHA_SSE2) + if (cpuflags & cpu_sse2) + flags |= cpu_sse2; +#endif + + return flags; +} +#endif + +static int +scrypt_test_mix(void) { + static const uint8_t expected[16] = { + 0x48,0x2b,0x2d,0xb8,0xa1,0x33,0x22,0x73,0xcd,0x16,0xc4,0xb4,0xb0,0x7f,0xb1,0x8a, + }; + + int ret = 1; + size_t cpuflags = detect_cpu(); + +#if defined(SCRYPT_CHACHA_XOP) + if (cpuflags & cpu_xop) + ret &= scrypt_test_mix_instance(scrypt_ChunkMix_xop, scrypt_romix_nop, scrypt_romix_nop, expected); +#endif + +#if defined(SCRYPT_CHACHA_AVX) + if (cpuflags & cpu_avx) + ret &= scrypt_test_mix_instance(scrypt_ChunkMix_avx, scrypt_romix_nop, scrypt_romix_nop, expected); +#endif + +#if defined(SCRYPT_CHACHA_SSSE3) + if (cpuflags & cpu_ssse3) + ret &= scrypt_test_mix_instance(scrypt_ChunkMix_ssse3, scrypt_romix_nop, scrypt_romix_nop, expected); +#endif + +#if defined(SCRYPT_CHACHA_SSE2) + if (cpuflags & cpu_sse2) + ret &= scrypt_test_mix_instance(scrypt_ChunkMix_sse2, scrypt_romix_nop, scrypt_romix_nop, expected); +#endif + +#if defined(SCRYPT_CHACHA_BASIC) + ret &= scrypt_test_mix_instance(scrypt_ChunkMix_basic, scrypt_romix_convert_endian, scrypt_romix_convert_endian, expected); +#endif + + return ret; +} + diff --git a/bf/scrypt-jane/code/scrypt-jane-hash.h b/bf/scrypt-jane/code/scrypt-jane-hash.h new file mode 100644 index 0000000..e727814 --- /dev/null +++ b/bf/scrypt-jane/code/scrypt-jane-hash.h @@ -0,0 +1,48 @@ +#if defined(SCRYPT_BLAKE512) +#include "scrypt-jane-hash_blake512.h" +#elif defined(SCRYPT_BLAKE256) +#include "scrypt-jane-hash_blake256.h" +#elif defined(SCRYPT_SHA512) +#include "scrypt-jane-hash_sha512.h" +#elif defined(SCRYPT_SHA256) +#include "scrypt-jane-hash_sha256.h" +#elif defined(SCRYPT_SKEIN512) +#include "scrypt-jane-hash_skein512.h" +#elif defined(SCRYPT_KECCAK512) || defined(SCRYPT_KECCAK256) +#include "scrypt-jane-hash_keccak.h" +#else + #define SCRYPT_HASH "ERROR" + #define SCRYPT_HASH_BLOCK_SIZE 64 + #define SCRYPT_HASH_DIGEST_SIZE 64 + typedef struct scrypt_hash_state_t { size_t dummy; } scrypt_hash_state; + typedef uint8_t scrypt_hash_digest[SCRYPT_HASH_DIGEST_SIZE]; + static void scrypt_hash_init(scrypt_hash_state *S) {} + static void scrypt_hash_update(scrypt_hash_state *S, const uint8_t *in, size_t inlen) {} + static void scrypt_hash_finish(scrypt_hash_state *S, uint8_t *hash) {} + static const uint8_t scrypt_test_hash_expected[SCRYPT_HASH_DIGEST_SIZE] = {0}; + #error must define a hash function! +#endif + +#include "scrypt-jane-pbkdf2.h" + +#define SCRYPT_TEST_HASH_LEN 257 /* (2 * largest block size) + 1 */ + +static int +scrypt_test_hash(void) { + scrypt_hash_state st; + scrypt_hash_digest hash, final; + uint8_t msg[SCRYPT_TEST_HASH_LEN]; + size_t i; + + for (i = 0; i < SCRYPT_TEST_HASH_LEN; i++) + msg[i] = (uint8_t)i; + + scrypt_hash_init(&st); + for (i = 0; i < SCRYPT_TEST_HASH_LEN + 1; i++) { + scrypt_hash(hash, msg, i); + scrypt_hash_update(&st, hash, sizeof(hash)); + } + scrypt_hash_finish(&st, final); + return scrypt_verify(final, scrypt_test_hash_expected, SCRYPT_HASH_DIGEST_SIZE); +} + diff --git a/bf/scrypt-jane/code/scrypt-jane-hash_blake256.h b/bf/scrypt-jane/code/scrypt-jane-hash_blake256.h new file mode 100644 index 0000000..4690b11 --- /dev/null +++ b/bf/scrypt-jane/code/scrypt-jane-hash_blake256.h @@ -0,0 +1,177 @@ +#define SCRYPT_HASH "BLAKE-256" +#define SCRYPT_HASH_BLOCK_SIZE 64 +#define SCRYPT_HASH_DIGEST_SIZE 32 + +typedef uint8_t scrypt_hash_digest[SCRYPT_HASH_DIGEST_SIZE]; + +const uint8_t blake256_sigma[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15, + 14,10, 4, 8, 9,15,13, 6, 1,12, 0, 2,11, 7, 5, 3, + 11, 8,12, 0, 5, 2,15,13,10,14, 3, 6, 7, 1, 9, 4, + 7, 9, 3, 1,13,12,11,14, 2, 6, 5,10, 4, 0,15, 8, + 9, 0, 5, 7, 2, 4,10,15,14, 1,11,12, 6, 8, 3,13, + 2,12, 6,10, 0,11, 8, 3, 4,13, 7, 5,15,14, 1, 9, + 12, 5, 1,15,14,13, 4,10, 0, 7, 6, 3, 9, 2, 8,11, + 13,11, 7,14,12, 1, 3, 9, 5, 0,15, 4, 8, 6, 2,10, + 6,15,14, 9,11, 3, 0, 8,12, 2,13, 7, 1, 4,10, 5, + 10, 2, 8, 4, 7, 6, 1, 5,15,11, 9,14, 3,12,13 ,0, +}; + +const uint32_t blake256_constants[16] = { + 0x243f6a88, 0x85a308d3, 0x13198a2e, 0x03707344,0xa4093822, 0x299f31d0, 0x082efa98, 0xec4e6c89, + 0x452821e6, 0x38d01377, 0xbe5466cf, 0x34e90c6c,0xc0ac29b7, 0xc97c50dd, 0x3f84d5b5, 0xb5470917 +}; + +typedef struct scrypt_hash_state_t { + uint32_t H[8], T[2]; + uint32_t leftover; + uint8_t buffer[SCRYPT_HASH_BLOCK_SIZE]; +} scrypt_hash_state; + +static void +blake256_blocks(scrypt_hash_state *S, const uint8_t *in, size_t blocks) { + const uint8_t *sigma, *sigma_end = blake256_sigma + (10 * 16); + uint32_t m[16], v[16], h[8], t[2]; + uint32_t i; + + for (i = 0; i < 8; i++) h[i] = S->H[i]; + for (i = 0; i < 2; i++) t[i] = S->T[i]; + + while (blocks--) { + t[0] += 512; + t[1] += (t[0] < 512) ? 1 : 0; + + for (i = 0; i < 8; i++) v[i ] = h[i]; + for (i = 0; i < 4; i++) v[i + 8] = blake256_constants[i]; + for (i = 0; i < 2; i++) v[i + 12] = blake256_constants[i+4] ^ t[0]; + for (i = 0; i < 2; i++) v[i + 14] = blake256_constants[i+6] ^ t[1]; + + for (i = 0; i < 16; i++) m[i] = U8TO32_BE(&in[i * 4]); + in += 64; + + #define G(a,b,c,d,e) \ + v[a] += (m[sigma[e+0]] ^ blake256_constants[sigma[e+1]]) + v[b]; \ + v[d] = ROTR32(v[d] ^ v[a],16); \ + v[c] += v[d]; \ + v[b] = ROTR32(v[b] ^ v[c],12); \ + v[a] += (m[sigma[e+1]] ^ blake256_constants[sigma[e+0]]) + v[b]; \ + v[d] = ROTR32(v[d] ^ v[a], 8); \ + v[c] += v[d]; \ + v[b] = ROTR32(v[b] ^ v[c], 7); + + for (i = 0, sigma = blake256_sigma; i < 14; i++) { + G(0, 4, 8,12, 0); + G(1, 5, 9,13, 2); + G(2, 6,10,14, 4); + G(3, 7,11,15, 6); + + G(0, 5,10,15, 8); + G(1, 6,11,12,10); + G(2, 7, 8,13,12); + G(3, 4, 9,14,14); + + sigma += 16; + if (sigma == sigma_end) + sigma = blake256_sigma; + } + + #undef G + + for (i = 0; i < 8; i++) h[i] ^= (v[i] ^ v[i + 8]); + } + + for (i = 0; i < 8; i++) S->H[i] = h[i]; + for (i = 0; i < 2; i++) S->T[i] = t[i]; +} + +static void +scrypt_hash_init(scrypt_hash_state *S) { + S->H[0] = 0x6a09e667ULL; + S->H[1] = 0xbb67ae85ULL; + S->H[2] = 0x3c6ef372ULL; + S->H[3] = 0xa54ff53aULL; + S->H[4] = 0x510e527fULL; + S->H[5] = 0x9b05688cULL; + S->H[6] = 0x1f83d9abULL; + S->H[7] = 0x5be0cd19ULL; + S->T[0] = 0; + S->T[1] = 0; + S->leftover = 0; +} + +static void +scrypt_hash_update(scrypt_hash_state *S, const uint8_t *in, size_t inlen) { + size_t blocks, want; + + /* handle the previous data */ + if (S->leftover) { + want = (SCRYPT_HASH_BLOCK_SIZE - S->leftover); + want = (want < inlen) ? want : inlen; + memcpy(S->buffer + S->leftover, in, want); + S->leftover += (uint32_t)want; + if (S->leftover < SCRYPT_HASH_BLOCK_SIZE) + return; + in += want; + inlen -= want; + blake256_blocks(S, S->buffer, 1); + } + + /* handle the current data */ + blocks = (inlen & ~(SCRYPT_HASH_BLOCK_SIZE - 1)); + S->leftover = (uint32_t)(inlen - blocks); + if (blocks) { + blake256_blocks(S, in, blocks / SCRYPT_HASH_BLOCK_SIZE); + in += blocks; + } + + /* handle leftover data */ + if (S->leftover) + memcpy(S->buffer, in, S->leftover); +} + +static void +scrypt_hash_finish(scrypt_hash_state *S, uint8_t *hash) { + uint32_t th, tl, bits; + + bits = (S->leftover << 3); + tl = S->T[0] + bits; + th = S->T[1]; + if (S->leftover == 0) { + S->T[0] = (uint32_t)0 - (uint32_t)512; + S->T[1] = (uint32_t)0 - (uint32_t)1; + } else if (S->T[0] == 0) { + S->T[0] = ((uint32_t)0 - (uint32_t)512) + bits; + S->T[1] = S->T[1] - 1; + } else { + S->T[0] -= (512 - bits); + } + + S->buffer[S->leftover] = 0x80; + if (S->leftover <= 55) { + memset(S->buffer + S->leftover + 1, 0, 55 - S->leftover); + } else { + memset(S->buffer + S->leftover + 1, 0, 63 - S->leftover); + blake256_blocks(S, S->buffer, 1); + S->T[0] = (uint32_t)0 - (uint32_t)512; + S->T[1] = (uint32_t)0 - (uint32_t)1; + memset(S->buffer, 0, 56); + } + S->buffer[55] |= 1; + U32TO8_BE(S->buffer + 56, th); + U32TO8_BE(S->buffer + 60, tl); + blake256_blocks(S, S->buffer, 1); + + U32TO8_BE(&hash[ 0], S->H[0]); + U32TO8_BE(&hash[ 4], S->H[1]); + U32TO8_BE(&hash[ 8], S->H[2]); + U32TO8_BE(&hash[12], S->H[3]); + U32TO8_BE(&hash[16], S->H[4]); + U32TO8_BE(&hash[20], S->H[5]); + U32TO8_BE(&hash[24], S->H[6]); + U32TO8_BE(&hash[28], S->H[7]); +} + +static const uint8_t scrypt_test_hash_expected[SCRYPT_HASH_DIGEST_SIZE] = { + 0xcc,0xa9,0x1e,0xa9,0x20,0x97,0x37,0x40,0x17,0xc0,0xa0,0x52,0x87,0xfc,0x08,0x20, + 0x40,0xf5,0x81,0x86,0x62,0x75,0x78,0xb2,0x79,0xce,0xde,0x27,0x3c,0x7f,0x85,0xd8, +}; diff --git a/bf/scrypt-jane/code/scrypt-jane-hash_blake512.h b/bf/scrypt-jane/code/scrypt-jane-hash_blake512.h new file mode 100644 index 0000000..ea2a583 --- /dev/null +++ b/bf/scrypt-jane/code/scrypt-jane-hash_blake512.h @@ -0,0 +1,181 @@ +#define SCRYPT_HASH "BLAKE-512" +#define SCRYPT_HASH_BLOCK_SIZE 128 +#define SCRYPT_HASH_DIGEST_SIZE 64 + +typedef uint8_t scrypt_hash_digest[SCRYPT_HASH_DIGEST_SIZE]; + +const uint8_t blake512_sigma[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15, + 14,10, 4, 8, 9,15,13, 6, 1,12, 0, 2,11, 7, 5, 3, + 11, 8,12, 0, 5, 2,15,13,10,14, 3, 6, 7, 1, 9, 4, + 7, 9, 3, 1,13,12,11,14, 2, 6, 5,10, 4, 0,15, 8, + 9, 0, 5, 7, 2, 4,10,15,14, 1,11,12, 6, 8, 3,13, + 2,12, 6,10, 0,11, 8, 3, 4,13, 7, 5,15,14, 1, 9, + 12, 5, 1,15,14,13, 4,10, 0, 7, 6, 3, 9, 2, 8,11, + 13,11, 7,14,12, 1, 3, 9, 5, 0,15, 4, 8, 6, 2,10, + 6,15,14, 9,11, 3, 0, 8,12, 2,13, 7, 1, 4,10, 5, + 10, 2, 8, 4, 7, 6, 1, 5,15,11, 9,14, 3,12,13 ,0, +}; + +const uint64_t blake512_constants[16] = { + 0x243f6a8885a308d3ULL, 0x13198a2e03707344ULL, 0xa4093822299f31d0ULL, 0x082efa98ec4e6c89ULL, + 0x452821e638d01377ULL, 0xbe5466cf34e90c6cULL, 0xc0ac29b7c97c50ddULL, 0x3f84d5b5b5470917ULL, + 0x9216d5d98979fb1bULL, 0xd1310ba698dfb5acULL, 0x2ffd72dbd01adfb7ULL, 0xb8e1afed6a267e96ULL, + 0xba7c9045f12c7f99ULL, 0x24a19947b3916cf7ULL, 0x0801f2e2858efc16ULL, 0x636920d871574e69ULL +}; + +typedef struct scrypt_hash_state_t { + uint64_t H[8], T[2]; + uint32_t leftover; + uint8_t buffer[SCRYPT_HASH_BLOCK_SIZE]; +} scrypt_hash_state; + +static void +blake512_blocks(scrypt_hash_state *S, const uint8_t *in, size_t blocks) { + const uint8_t *sigma, *sigma_end = blake512_sigma + (10 * 16); + uint64_t m[16], v[16], h[8], t[2]; + uint32_t i; + + for (i = 0; i < 8; i++) h[i] = S->H[i]; + for (i = 0; i < 2; i++) t[i] = S->T[i]; + + while (blocks--) { + t[0] += 1024; + t[1] += (t[0] < 1024) ? 1 : 0; + + for (i = 0; i < 8; i++) v[i ] = h[i]; + for (i = 0; i < 4; i++) v[i + 8] = blake512_constants[i]; + for (i = 0; i < 2; i++) v[i + 12] = blake512_constants[i+4] ^ t[0]; + for (i = 0; i < 2; i++) v[i + 14] = blake512_constants[i+6] ^ t[1]; + + for (i = 0; i < 16; i++) m[i] = U8TO64_BE(&in[i * 8]); + in += 128; + + #define G(a,b,c,d,e) \ + v[a] += (m[sigma[e+0]] ^ blake512_constants[sigma[e+1]]) + v[b]; \ + v[d] = ROTR64(v[d] ^ v[a],32); \ + v[c] += v[d]; \ + v[b] = ROTR64(v[b] ^ v[c],25); \ + v[a] += (m[sigma[e+1]] ^ blake512_constants[sigma[e+0]]) + v[b]; \ + v[d] = ROTR64(v[d] ^ v[a],16); \ + v[c] += v[d]; \ + v[b] = ROTR64(v[b] ^ v[c],11); + + for (i = 0, sigma = blake512_sigma; i < 16; i++) { + G(0, 4, 8,12, 0); + G(1, 5, 9,13, 2); + G(2, 6,10,14, 4); + G(3, 7,11,15, 6); + G(0, 5,10,15, 8); + G(1, 6,11,12,10); + G(2, 7, 8,13,12); + G(3, 4, 9,14,14); + + sigma += 16; + if (sigma == sigma_end) + sigma = blake512_sigma; + } + + #undef G + + for (i = 0; i < 8; i++) h[i] ^= (v[i] ^ v[i + 8]); + } + + for (i = 0; i < 8; i++) S->H[i] = h[i]; + for (i = 0; i < 2; i++) S->T[i] = t[i]; +} + +static void +scrypt_hash_init(scrypt_hash_state *S) { + S->H[0] = 0x6a09e667f3bcc908ULL; + S->H[1] = 0xbb67ae8584caa73bULL; + S->H[2] = 0x3c6ef372fe94f82bULL; + S->H[3] = 0xa54ff53a5f1d36f1ULL; + S->H[4] = 0x510e527fade682d1ULL; + S->H[5] = 0x9b05688c2b3e6c1fULL; + S->H[6] = 0x1f83d9abfb41bd6bULL; + S->H[7] = 0x5be0cd19137e2179ULL; + S->T[0] = 0; + S->T[1] = 0; + S->leftover = 0; +} + +static void +scrypt_hash_update(scrypt_hash_state *S, const uint8_t *in, size_t inlen) { + size_t blocks, want; + + /* handle the previous data */ + if (S->leftover) { + want = (SCRYPT_HASH_BLOCK_SIZE - S->leftover); + want = (want < inlen) ? want : inlen; + memcpy(S->buffer + S->leftover, in, want); + S->leftover += (uint32_t)want; + if (S->leftover < SCRYPT_HASH_BLOCK_SIZE) + return; + in += want; + inlen -= want; + blake512_blocks(S, S->buffer, 1); + } + + /* handle the current data */ + blocks = (inlen & ~(SCRYPT_HASH_BLOCK_SIZE - 1)); + S->leftover = (uint32_t)(inlen - blocks); + if (blocks) { + blake512_blocks(S, in, blocks / SCRYPT_HASH_BLOCK_SIZE); + in += blocks; + } + + /* handle leftover data */ + if (S->leftover) + memcpy(S->buffer, in, S->leftover); +} + +static void +scrypt_hash_finish(scrypt_hash_state *S, uint8_t *hash) { + uint64_t th, tl; + size_t bits; + + bits = (S->leftover << 3); + tl = S->T[0] + bits; + th = S->T[1]; + if (S->leftover == 0) { + S->T[0] = (uint64_t)0 - (uint64_t)1024; + S->T[1] = (uint64_t)0 - (uint64_t)1; + } else if (S->T[0] == 0) { + S->T[0] = ((uint64_t)0 - (uint64_t)1024) + bits; + S->T[1] = S->T[1] - 1; + } else { + S->T[0] -= (1024 - bits); + } + + S->buffer[S->leftover] = 0x80; + if (S->leftover <= 111) { + memset(S->buffer + S->leftover + 1, 0, 111 - S->leftover); + } else { + memset(S->buffer + S->leftover + 1, 0, 127 - S->leftover); + blake512_blocks(S, S->buffer, 1); + S->T[0] = (uint64_t)0 - (uint64_t)1024; + S->T[1] = (uint64_t)0 - (uint64_t)1; + memset(S->buffer, 0, 112); + } + S->buffer[111] |= 1; + U64TO8_BE(S->buffer + 112, th); + U64TO8_BE(S->buffer + 120, tl); + blake512_blocks(S, S->buffer, 1); + + U64TO8_BE(&hash[ 0], S->H[0]); + U64TO8_BE(&hash[ 8], S->H[1]); + U64TO8_BE(&hash[16], S->H[2]); + U64TO8_BE(&hash[24], S->H[3]); + U64TO8_BE(&hash[32], S->H[4]); + U64TO8_BE(&hash[40], S->H[5]); + U64TO8_BE(&hash[48], S->H[6]); + U64TO8_BE(&hash[56], S->H[7]); +} + +static const uint8_t scrypt_test_hash_expected[SCRYPT_HASH_DIGEST_SIZE] = { + 0x2f,0x9d,0x5b,0xbe,0x24,0x0d,0x63,0xd3,0xa0,0xac,0x4f,0xd3,0x01,0xc0,0x23,0x6f, + 0x6d,0xdf,0x6e,0xfb,0x60,0x6f,0xa0,0x74,0xdf,0x9f,0x25,0x65,0xb6,0x11,0x0a,0x83, + 0x23,0x96,0xba,0x91,0x68,0x4b,0x85,0x15,0x13,0x54,0xba,0x19,0xf3,0x2c,0x5a,0x4a, + 0x1f,0x78,0x31,0x02,0xc9,0x1e,0x56,0xc4,0x54,0xca,0xf9,0x8f,0x2c,0x7f,0x85,0xac +}; diff --git a/bf/scrypt-jane/code/scrypt-jane-hash_keccak.h b/bf/scrypt-jane/code/scrypt-jane-hash_keccak.h new file mode 100644 index 0000000..7ed5574 --- /dev/null +++ b/bf/scrypt-jane/code/scrypt-jane-hash_keccak.h @@ -0,0 +1,168 @@ +#if defined(SCRYPT_KECCAK256) + #define SCRYPT_HASH "Keccak-256" + #define SCRYPT_HASH_DIGEST_SIZE 32 +#else + #define SCRYPT_HASH "Keccak-512" + #define SCRYPT_HASH_DIGEST_SIZE 64 +#endif +#define SCRYPT_KECCAK_F 1600 +#define SCRYPT_KECCAK_C (SCRYPT_HASH_DIGEST_SIZE * 8 * 2) /* 256=512, 512=1024 */ +#define SCRYPT_KECCAK_R (SCRYPT_KECCAK_F - SCRYPT_KECCAK_C) /* 256=1088, 512=576 */ +#define SCRYPT_HASH_BLOCK_SIZE (SCRYPT_KECCAK_R / 8) + +typedef uint8_t scrypt_hash_digest[SCRYPT_HASH_DIGEST_SIZE]; + +typedef struct scrypt_hash_state_t { + uint64_t state[SCRYPT_KECCAK_F / 64]; + uint32_t leftover; + uint8_t buffer[SCRYPT_HASH_BLOCK_SIZE]; +} scrypt_hash_state; + +static const uint64_t keccak_round_constants[24] = { + 0x0000000000000001ull, 0x0000000000008082ull, + 0x800000000000808aull, 0x8000000080008000ull, + 0x000000000000808bull, 0x0000000080000001ull, + 0x8000000080008081ull, 0x8000000000008009ull, + 0x000000000000008aull, 0x0000000000000088ull, + 0x0000000080008009ull, 0x000000008000000aull, + 0x000000008000808bull, 0x800000000000008bull, + 0x8000000000008089ull, 0x8000000000008003ull, + 0x8000000000008002ull, 0x8000000000000080ull, + 0x000000000000800aull, 0x800000008000000aull, + 0x8000000080008081ull, 0x8000000000008080ull, + 0x0000000080000001ull, 0x8000000080008008ull +}; + +static void +keccak_block(scrypt_hash_state *S, const uint8_t *in) { + size_t i; + uint64_t *s = S->state, t[5], u[5], v, w; + + /* absorb input */ + for (i = 0; i < SCRYPT_HASH_BLOCK_SIZE / 8; i++, in += 8) + s[i] ^= U8TO64_LE(in); + + for (i = 0; i < 24; i++) { + /* theta: c = a[0,i] ^ a[1,i] ^ .. a[4,i] */ + t[0] = s[0] ^ s[5] ^ s[10] ^ s[15] ^ s[20]; + t[1] = s[1] ^ s[6] ^ s[11] ^ s[16] ^ s[21]; + t[2] = s[2] ^ s[7] ^ s[12] ^ s[17] ^ s[22]; + t[3] = s[3] ^ s[8] ^ s[13] ^ s[18] ^ s[23]; + t[4] = s[4] ^ s[9] ^ s[14] ^ s[19] ^ s[24]; + + /* theta: d[i] = c[i+4] ^ rotl(c[i+1],1) */ + u[0] = t[4] ^ ROTL64(t[1], 1); + u[1] = t[0] ^ ROTL64(t[2], 1); + u[2] = t[1] ^ ROTL64(t[3], 1); + u[3] = t[2] ^ ROTL64(t[4], 1); + u[4] = t[3] ^ ROTL64(t[0], 1); + + /* theta: a[0,i], a[1,i], .. a[4,i] ^= d[i] */ + s[0] ^= u[0]; s[5] ^= u[0]; s[10] ^= u[0]; s[15] ^= u[0]; s[20] ^= u[0]; + s[1] ^= u[1]; s[6] ^= u[1]; s[11] ^= u[1]; s[16] ^= u[1]; s[21] ^= u[1]; + s[2] ^= u[2]; s[7] ^= u[2]; s[12] ^= u[2]; s[17] ^= u[2]; s[22] ^= u[2]; + s[3] ^= u[3]; s[8] ^= u[3]; s[13] ^= u[3]; s[18] ^= u[3]; s[23] ^= u[3]; + s[4] ^= u[4]; s[9] ^= u[4]; s[14] ^= u[4]; s[19] ^= u[4]; s[24] ^= u[4]; + + /* rho pi: b[..] = rotl(a[..], ..) */ + v = s[ 1]; + s[ 1] = ROTL64(s[ 6], 44); + s[ 6] = ROTL64(s[ 9], 20); + s[ 9] = ROTL64(s[22], 61); + s[22] = ROTL64(s[14], 39); + s[14] = ROTL64(s[20], 18); + s[20] = ROTL64(s[ 2], 62); + s[ 2] = ROTL64(s[12], 43); + s[12] = ROTL64(s[13], 25); + s[13] = ROTL64(s[19], 8); + s[19] = ROTL64(s[23], 56); + s[23] = ROTL64(s[15], 41); + s[15] = ROTL64(s[ 4], 27); + s[ 4] = ROTL64(s[24], 14); + s[24] = ROTL64(s[21], 2); + s[21] = ROTL64(s[ 8], 55); + s[ 8] = ROTL64(s[16], 45); + s[16] = ROTL64(s[ 5], 36); + s[ 5] = ROTL64(s[ 3], 28); + s[ 3] = ROTL64(s[18], 21); + s[18] = ROTL64(s[17], 15); + s[17] = ROTL64(s[11], 10); + s[11] = ROTL64(s[ 7], 6); + s[ 7] = ROTL64(s[10], 3); + s[10] = ROTL64( v, 1); + + /* chi: a[i,j] ^= ~b[i,j+1] & b[i,j+2] */ + v = s[ 0]; w = s[ 1]; s[ 0] ^= (~w) & s[ 2]; s[ 1] ^= (~s[ 2]) & s[ 3]; s[ 2] ^= (~s[ 3]) & s[ 4]; s[ 3] ^= (~s[ 4]) & v; s[ 4] ^= (~v) & w; + v = s[ 5]; w = s[ 6]; s[ 5] ^= (~w) & s[ 7]; s[ 6] ^= (~s[ 7]) & s[ 8]; s[ 7] ^= (~s[ 8]) & s[ 9]; s[ 8] ^= (~s[ 9]) & v; s[ 9] ^= (~v) & w; + v = s[10]; w = s[11]; s[10] ^= (~w) & s[12]; s[11] ^= (~s[12]) & s[13]; s[12] ^= (~s[13]) & s[14]; s[13] ^= (~s[14]) & v; s[14] ^= (~v) & w; + v = s[15]; w = s[16]; s[15] ^= (~w) & s[17]; s[16] ^= (~s[17]) & s[18]; s[17] ^= (~s[18]) & s[19]; s[18] ^= (~s[19]) & v; s[19] ^= (~v) & w; + v = s[20]; w = s[21]; s[20] ^= (~w) & s[22]; s[21] ^= (~s[22]) & s[23]; s[22] ^= (~s[23]) & s[24]; s[23] ^= (~s[24]) & v; s[24] ^= (~v) & w; + + /* iota: a[0,0] ^= round constant */ + s[0] ^= keccak_round_constants[i]; + } +} + +static void +scrypt_hash_init(scrypt_hash_state *S) { + memset(S, 0, sizeof(*S)); +} + +static void +scrypt_hash_update(scrypt_hash_state *S, const uint8_t *in, size_t inlen) { + size_t want; + + /* handle the previous data */ + if (S->leftover) { + want = (SCRYPT_HASH_BLOCK_SIZE - S->leftover); + want = (want < inlen) ? want : inlen; + memcpy(S->buffer + S->leftover, in, want); + S->leftover += (uint32_t)want; + if (S->leftover < SCRYPT_HASH_BLOCK_SIZE) + return; + in += want; + inlen -= want; + keccak_block(S, S->buffer); + } + + /* handle the current data */ + while (inlen >= SCRYPT_HASH_BLOCK_SIZE) { + keccak_block(S, in); + in += SCRYPT_HASH_BLOCK_SIZE; + inlen -= SCRYPT_HASH_BLOCK_SIZE; + } + + /* handle leftover data */ + S->leftover = (uint32_t)inlen; + if (S->leftover) + memcpy(S->buffer, in, S->leftover); +} + +static void +scrypt_hash_finish(scrypt_hash_state *S, uint8_t *hash) { + size_t i; + + S->buffer[S->leftover] = 0x01; + memset(S->buffer + (S->leftover + 1), 0, SCRYPT_HASH_BLOCK_SIZE - (S->leftover + 1)); + S->buffer[SCRYPT_HASH_BLOCK_SIZE - 1] |= 0x80; + keccak_block(S, S->buffer); + + for (i = 0; i < SCRYPT_HASH_DIGEST_SIZE; i += 8) { + U64TO8_LE(&hash[i], S->state[i / 8]); + } +} + +#if defined(SCRYPT_KECCAK256) +static const uint8_t scrypt_test_hash_expected[SCRYPT_HASH_DIGEST_SIZE] = { + 0x26,0xb7,0x10,0xb3,0x66,0xb1,0xd1,0xb1,0x25,0xfc,0x3e,0xe3,0x1e,0x33,0x1d,0x19, + 0x94,0xaa,0x63,0x7a,0xd5,0x77,0x29,0xb4,0x27,0xe9,0xe0,0xf4,0x19,0xba,0x68,0xea, +}; +#else +static const uint8_t scrypt_test_hash_expected[SCRYPT_HASH_DIGEST_SIZE] = { + 0x17,0xc7,0x8c,0xa0,0xd9,0x08,0x1d,0xba,0x8a,0xc8,0x3e,0x07,0x90,0xda,0x91,0x88, + 0x25,0xbd,0xd3,0xf8,0x78,0x4a,0x8d,0x5e,0xe4,0x96,0x9c,0x01,0xf3,0xeb,0xdc,0x12, + 0xea,0x35,0x57,0xba,0x94,0xb8,0xe9,0xb9,0x27,0x45,0x0a,0x48,0x5c,0x3d,0x69,0xf0, + 0xdb,0x22,0x38,0xb5,0x52,0x22,0x29,0xea,0x7a,0xb2,0xe6,0x07,0xaa,0x37,0x4d,0xe6, +}; +#endif + diff --git a/bf/scrypt-jane/code/scrypt-jane-hash_sha256.h b/bf/scrypt-jane/code/scrypt-jane-hash_sha256.h new file mode 100644 index 0000000..d06d3e1 --- /dev/null +++ b/bf/scrypt-jane/code/scrypt-jane-hash_sha256.h @@ -0,0 +1,135 @@ +#define SCRYPT_HASH "SHA-2-256" +#define SCRYPT_HASH_BLOCK_SIZE 64 +#define SCRYPT_HASH_DIGEST_SIZE 32 + +typedef uint8_t scrypt_hash_digest[SCRYPT_HASH_DIGEST_SIZE]; + +typedef struct scrypt_hash_state_t { + uint32_t H[8]; + uint64_t T; + uint32_t leftover; + uint8_t buffer[SCRYPT_HASH_BLOCK_SIZE]; +} scrypt_hash_state; + +static const uint32_t sha256_constants[64] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 +}; + +#define Ch(x,y,z) (z ^ (x & (y ^ z))) +#define Maj(x,y,z) (((x | y) & z) | (x & y)) +#define S0(x) (ROTR32(x, 2) ^ ROTR32(x, 13) ^ ROTR32(x, 22)) +#define S1(x) (ROTR32(x, 6) ^ ROTR32(x, 11) ^ ROTR32(x, 25)) +#define G0(x) (ROTR32(x, 7) ^ ROTR32(x, 18) ^ (x >> 3)) +#define G1(x) (ROTR32(x, 17) ^ ROTR32(x, 19) ^ (x >> 10)) +#define W0(in,i) (U8TO32_BE(&in[i * 4])) +#define W1(i) (G1(w[i - 2]) + w[i - 7] + G0(w[i - 15]) + w[i - 16]) +#define STEP(i) \ + t1 = S0(r[0]) + Maj(r[0], r[1], r[2]); \ + t0 = r[7] + S1(r[4]) + Ch(r[4], r[5], r[6]) + sha256_constants[i] + w[i]; \ + r[7] = r[6]; \ + r[6] = r[5]; \ + r[5] = r[4]; \ + r[4] = r[3] + t0; \ + r[3] = r[2]; \ + r[2] = r[1]; \ + r[1] = r[0]; \ + r[0] = t0 + t1; + +static void +sha256_blocks(scrypt_hash_state *S, const uint8_t *in, size_t blocks) { + uint32_t r[8], w[64], t0, t1; + size_t i; + + for (i = 0; i < 8; i++) r[i] = S->H[i]; + + while (blocks--) { + for (i = 0; i < 16; i++) { w[i] = W0(in, i); } + for (i = 16; i < 64; i++) { w[i] = W1(i); } + for (i = 0; i < 64; i++) { STEP(i); } + for (i = 0; i < 8; i++) { r[i] += S->H[i]; S->H[i] = r[i]; } + S->T += SCRYPT_HASH_BLOCK_SIZE * 8; + in += SCRYPT_HASH_BLOCK_SIZE; + } +} + +static void +scrypt_hash_init(scrypt_hash_state *S) { + S->H[0] = 0x6a09e667; + S->H[1] = 0xbb67ae85; + S->H[2] = 0x3c6ef372; + S->H[3] = 0xa54ff53a; + S->H[4] = 0x510e527f; + S->H[5] = 0x9b05688c; + S->H[6] = 0x1f83d9ab; + S->H[7] = 0x5be0cd19; + S->T = 0; + S->leftover = 0; +} + +static void +scrypt_hash_update(scrypt_hash_state *S, const uint8_t *in, size_t inlen) { + size_t blocks, want; + + /* handle the previous data */ + if (S->leftover) { + want = (SCRYPT_HASH_BLOCK_SIZE - S->leftover); + want = (want < inlen) ? want : inlen; + memcpy(S->buffer + S->leftover, in, want); + S->leftover += (uint32_t)want; + if (S->leftover < SCRYPT_HASH_BLOCK_SIZE) + return; + in += want; + inlen -= want; + sha256_blocks(S, S->buffer, 1); + } + + /* handle the current data */ + blocks = (inlen & ~(SCRYPT_HASH_BLOCK_SIZE - 1)); + S->leftover = (uint32_t)(inlen - blocks); + if (blocks) { + sha256_blocks(S, in, blocks / SCRYPT_HASH_BLOCK_SIZE); + in += blocks; + } + + /* handle leftover data */ + if (S->leftover) + memcpy(S->buffer, in, S->leftover); +} + +static void +scrypt_hash_finish(scrypt_hash_state *S, uint8_t *hash) { + uint64_t t = S->T + (S->leftover * 8); + + S->buffer[S->leftover] = 0x80; + if (S->leftover <= 55) { + memset(S->buffer + S->leftover + 1, 0, 55 - S->leftover); + } else { + memset(S->buffer + S->leftover + 1, 0, 63 - S->leftover); + sha256_blocks(S, S->buffer, 1); + memset(S->buffer, 0, 56); + } + + U64TO8_BE(S->buffer + 56, t); + sha256_blocks(S, S->buffer, 1); + + U32TO8_BE(&hash[ 0], S->H[0]); + U32TO8_BE(&hash[ 4], S->H[1]); + U32TO8_BE(&hash[ 8], S->H[2]); + U32TO8_BE(&hash[12], S->H[3]); + U32TO8_BE(&hash[16], S->H[4]); + U32TO8_BE(&hash[20], S->H[5]); + U32TO8_BE(&hash[24], S->H[6]); + U32TO8_BE(&hash[28], S->H[7]); +} + +static const uint8_t scrypt_test_hash_expected[SCRYPT_HASH_DIGEST_SIZE] = { + 0xee,0x36,0xae,0xa6,0x65,0xf0,0x28,0x7d,0xc9,0xde,0xd8,0xad,0x48,0x33,0x7d,0xbf, + 0xcb,0xc0,0x48,0xfa,0x5f,0x92,0xfd,0x0a,0x95,0x6f,0x34,0x8e,0x8c,0x1e,0x73,0xad, +}; diff --git a/bf/scrypt-jane/code/scrypt-jane-hash_sha512.h b/bf/scrypt-jane/code/scrypt-jane-hash_sha512.h new file mode 100644 index 0000000..3e3997d --- /dev/null +++ b/bf/scrypt-jane/code/scrypt-jane-hash_sha512.h @@ -0,0 +1,152 @@ +#define SCRYPT_HASH "SHA-2-512" +#define SCRYPT_HASH_BLOCK_SIZE 128 +#define SCRYPT_HASH_DIGEST_SIZE 64 + +typedef uint8_t scrypt_hash_digest[SCRYPT_HASH_DIGEST_SIZE]; + +typedef struct scrypt_hash_state_t { + uint64_t H[8]; + uint64_t T[2]; + uint32_t leftover; + uint8_t buffer[SCRYPT_HASH_BLOCK_SIZE]; +} scrypt_hash_state; + +static const uint64_t sha512_constants[80] = { + 0x428a2f98d728ae22ull, 0x7137449123ef65cdull, 0xb5c0fbcfec4d3b2full, 0xe9b5dba58189dbbcull, + 0x3956c25bf348b538ull, 0x59f111f1b605d019ull, 0x923f82a4af194f9bull, 0xab1c5ed5da6d8118ull, + 0xd807aa98a3030242ull, 0x12835b0145706fbeull, 0x243185be4ee4b28cull, 0x550c7dc3d5ffb4e2ull, + 0x72be5d74f27b896full, 0x80deb1fe3b1696b1ull, 0x9bdc06a725c71235ull, 0xc19bf174cf692694ull, + 0xe49b69c19ef14ad2ull, 0xefbe4786384f25e3ull, 0x0fc19dc68b8cd5b5ull, 0x240ca1cc77ac9c65ull, + 0x2de92c6f592b0275ull, 0x4a7484aa6ea6e483ull, 0x5cb0a9dcbd41fbd4ull, 0x76f988da831153b5ull, + 0x983e5152ee66dfabull, 0xa831c66d2db43210ull, 0xb00327c898fb213full, 0xbf597fc7beef0ee4ull, + 0xc6e00bf33da88fc2ull, 0xd5a79147930aa725ull, 0x06ca6351e003826full, 0x142929670a0e6e70ull, + 0x27b70a8546d22ffcull, 0x2e1b21385c26c926ull, 0x4d2c6dfc5ac42aedull, 0x53380d139d95b3dfull, + 0x650a73548baf63deull, 0x766a0abb3c77b2a8ull, 0x81c2c92e47edaee6ull, 0x92722c851482353bull, + 0xa2bfe8a14cf10364ull, 0xa81a664bbc423001ull, 0xc24b8b70d0f89791ull, 0xc76c51a30654be30ull, + 0xd192e819d6ef5218ull, 0xd69906245565a910ull, 0xf40e35855771202aull, 0x106aa07032bbd1b8ull, + 0x19a4c116b8d2d0c8ull, 0x1e376c085141ab53ull, 0x2748774cdf8eeb99ull, 0x34b0bcb5e19b48a8ull, + 0x391c0cb3c5c95a63ull, 0x4ed8aa4ae3418acbull, 0x5b9cca4f7763e373ull, 0x682e6ff3d6b2b8a3ull, + 0x748f82ee5defb2fcull, 0x78a5636f43172f60ull, 0x84c87814a1f0ab72ull, 0x8cc702081a6439ecull, + 0x90befffa23631e28ull, 0xa4506cebde82bde9ull, 0xbef9a3f7b2c67915ull, 0xc67178f2e372532bull, + 0xca273eceea26619cull, 0xd186b8c721c0c207ull, 0xeada7dd6cde0eb1eull, 0xf57d4f7fee6ed178ull, + 0x06f067aa72176fbaull, 0x0a637dc5a2c898a6ull, 0x113f9804bef90daeull, 0x1b710b35131c471bull, + 0x28db77f523047d84ull, 0x32caab7b40c72493ull, 0x3c9ebe0a15c9bebcull, 0x431d67c49c100d4cull, + 0x4cc5d4becb3e42b6ull, 0x597f299cfc657e2aull, 0x5fcb6fab3ad6faecull, 0x6c44198c4a475817ull +}; + +#define Ch(x,y,z) (z ^ (x & (y ^ z))) +#define Maj(x,y,z) (((x | y) & z) | (x & y)) +#define S0(x) (ROTR64(x, 28) ^ ROTR64(x, 34) ^ ROTR64(x, 39)) +#define S1(x) (ROTR64(x, 14) ^ ROTR64(x, 18) ^ ROTR64(x, 41)) +#define G0(x) (ROTR64(x, 1) ^ ROTR64(x, 8) ^ (x >> 7)) +#define G1(x) (ROTR64(x, 19) ^ ROTR64(x, 61) ^ (x >> 6)) +#define W0(in,i) (U8TO64_BE(&in[i * 8])) +#define W1(i) (G1(w[i - 2]) + w[i - 7] + G0(w[i - 15]) + w[i - 16]) +#define STEP(i) \ + t1 = S0(r[0]) + Maj(r[0], r[1], r[2]); \ + t0 = r[7] + S1(r[4]) + Ch(r[4], r[5], r[6]) + sha512_constants[i] + w[i]; \ + r[7] = r[6]; \ + r[6] = r[5]; \ + r[5] = r[4]; \ + r[4] = r[3] + t0; \ + r[3] = r[2]; \ + r[2] = r[1]; \ + r[1] = r[0]; \ + r[0] = t0 + t1; + +static void +sha512_blocks(scrypt_hash_state *S, const uint8_t *in, size_t blocks) { + uint64_t r[8], w[80], t0, t1; + size_t i; + + for (i = 0; i < 8; i++) r[i] = S->H[i]; + + while (blocks--) { + for (i = 0; i < 16; i++) { w[i] = W0(in, i); } + for (i = 16; i < 80; i++) { w[i] = W1(i); } + for (i = 0; i < 80; i++) { STEP(i); } + for (i = 0; i < 8; i++) { r[i] += S->H[i]; S->H[i] = r[i]; } + S->T[0] += SCRYPT_HASH_BLOCK_SIZE * 8; + S->T[1] += (!S->T[0]) ? 1 : 0; + in += SCRYPT_HASH_BLOCK_SIZE; + } +} + +static void +scrypt_hash_init(scrypt_hash_state *S) { + S->H[0] = 0x6a09e667f3bcc908ull; + S->H[1] = 0xbb67ae8584caa73bull; + S->H[2] = 0x3c6ef372fe94f82bull; + S->H[3] = 0xa54ff53a5f1d36f1ull; + S->H[4] = 0x510e527fade682d1ull; + S->H[5] = 0x9b05688c2b3e6c1full; + S->H[6] = 0x1f83d9abfb41bd6bull; + S->H[7] = 0x5be0cd19137e2179ull; + S->T[0] = 0; + S->T[1] = 0; + S->leftover = 0; +} + +static void +scrypt_hash_update(scrypt_hash_state *S, const uint8_t *in, size_t inlen) { + size_t blocks, want; + + /* handle the previous data */ + if (S->leftover) { + want = (SCRYPT_HASH_BLOCK_SIZE - S->leftover); + want = (want < inlen) ? want : inlen; + memcpy(S->buffer + S->leftover, in, want); + S->leftover += (uint32_t)want; + if (S->leftover < SCRYPT_HASH_BLOCK_SIZE) + return; + in += want; + inlen -= want; + sha512_blocks(S, S->buffer, 1); + } + + /* handle the current data */ + blocks = (inlen & ~(SCRYPT_HASH_BLOCK_SIZE - 1)); + S->leftover = (uint32_t)(inlen - blocks); + if (blocks) { + sha512_blocks(S, in, blocks / SCRYPT_HASH_BLOCK_SIZE); + in += blocks; + } + + /* handle leftover data */ + if (S->leftover) + memcpy(S->buffer, in, S->leftover); +} + +static void +scrypt_hash_finish(scrypt_hash_state *S, uint8_t *hash) { + uint64_t t0 = S->T[0] + (S->leftover * 8), t1 = S->T[1]; + + S->buffer[S->leftover] = 0x80; + if (S->leftover <= 111) { + memset(S->buffer + S->leftover + 1, 0, 111 - S->leftover); + } else { + memset(S->buffer + S->leftover + 1, 0, 127 - S->leftover); + sha512_blocks(S, S->buffer, 1); + memset(S->buffer, 0, 112); + } + + U64TO8_BE(S->buffer + 112, t1); + U64TO8_BE(S->buffer + 120, t0); + sha512_blocks(S, S->buffer, 1); + + U64TO8_BE(&hash[ 0], S->H[0]); + U64TO8_BE(&hash[ 8], S->H[1]); + U64TO8_BE(&hash[16], S->H[2]); + U64TO8_BE(&hash[24], S->H[3]); + U64TO8_BE(&hash[32], S->H[4]); + U64TO8_BE(&hash[40], S->H[5]); + U64TO8_BE(&hash[48], S->H[6]); + U64TO8_BE(&hash[56], S->H[7]); +} + +static const uint8_t scrypt_test_hash_expected[SCRYPT_HASH_DIGEST_SIZE] = { + 0xba,0xc3,0x80,0x2b,0x24,0x56,0x95,0x1f,0x19,0x7c,0xa2,0xd3,0x72,0x7c,0x9a,0x4d, + 0x1d,0x50,0x3a,0xa9,0x12,0x27,0xd8,0xe1,0xbe,0x76,0x53,0x87,0x5a,0x1e,0x82,0xec, + 0xc8,0xe1,0x6b,0x87,0xd0,0xb5,0x25,0x7e,0xe8,0x1e,0xd7,0x58,0xc6,0x2d,0xc2,0x9c, + 0x06,0x31,0x8f,0x5b,0x57,0x8e,0x76,0xba,0xd5,0xf6,0xec,0xfe,0x85,0x1f,0x34,0x0c, +}; diff --git a/bf/scrypt-jane/code/scrypt-jane-hash_skein512.h b/bf/scrypt-jane/code/scrypt-jane-hash_skein512.h new file mode 100644 index 0000000..736d893 --- /dev/null +++ b/bf/scrypt-jane/code/scrypt-jane-hash_skein512.h @@ -0,0 +1,188 @@ +#define SCRYPT_HASH "Skein-512" +#define SCRYPT_HASH_BLOCK_SIZE 64 +#define SCRYPT_HASH_DIGEST_SIZE 64 + +typedef uint8_t scrypt_hash_digest[SCRYPT_HASH_DIGEST_SIZE]; + +typedef struct scrypt_hash_state_t { + uint64_t X[8], T[2]; + uint32_t leftover; + uint8_t buffer[SCRYPT_HASH_BLOCK_SIZE]; +} scrypt_hash_state; + +#include + +static void +skein512_blocks(scrypt_hash_state *S, const uint8_t *in, size_t blocks, size_t add) { + uint64_t X[8], key[8], Xt[9+18], T[3+1]; + size_t r; + + while (blocks--) { + T[0] = S->T[0] + add; + T[1] = S->T[1]; + T[2] = T[0] ^ T[1]; + key[0] = U8TO64_LE(in + 0); Xt[0] = S->X[0]; X[0] = key[0] + Xt[0]; + key[1] = U8TO64_LE(in + 8); Xt[1] = S->X[1]; X[1] = key[1] + Xt[1]; + key[2] = U8TO64_LE(in + 16); Xt[2] = S->X[2]; X[2] = key[2] + Xt[2]; + key[3] = U8TO64_LE(in + 24); Xt[3] = S->X[3]; X[3] = key[3] + Xt[3]; + key[4] = U8TO64_LE(in + 32); Xt[4] = S->X[4]; X[4] = key[4] + Xt[4]; + key[5] = U8TO64_LE(in + 40); Xt[5] = S->X[5]; X[5] = key[5] + Xt[5] + T[0]; + key[6] = U8TO64_LE(in + 48); Xt[6] = S->X[6]; X[6] = key[6] + Xt[6] + T[1]; + key[7] = U8TO64_LE(in + 56); Xt[7] = S->X[7]; X[7] = key[7] + Xt[7]; + Xt[8] = 0x1BD11BDAA9FC1A22ull ^ Xt[0] ^ Xt[1] ^ Xt[2] ^ Xt[3] ^ Xt[4] ^ Xt[5] ^ Xt[6] ^ Xt[7]; + in += SCRYPT_HASH_BLOCK_SIZE; + + for (r = 0; r < 18; r++) + Xt[r + 9] = Xt[r + 0]; + + for (r = 0; r < 18; r += 2) { + X[0] += X[1]; X[1] = ROTL64(X[1], 46) ^ X[0]; + X[2] += X[3]; X[3] = ROTL64(X[3], 36) ^ X[2]; + X[4] += X[5]; X[5] = ROTL64(X[5], 19) ^ X[4]; + X[6] += X[7]; X[7] = ROTL64(X[7], 37) ^ X[6]; + X[2] += X[1]; X[1] = ROTL64(X[1], 33) ^ X[2]; + X[0] += X[3]; X[3] = ROTL64(X[3], 42) ^ X[0]; + X[6] += X[5]; X[5] = ROTL64(X[5], 14) ^ X[6]; + X[4] += X[7]; X[7] = ROTL64(X[7], 27) ^ X[4]; + X[4] += X[1]; X[1] = ROTL64(X[1], 17) ^ X[4]; + X[6] += X[3]; X[3] = ROTL64(X[3], 49) ^ X[6]; + X[0] += X[5]; X[5] = ROTL64(X[5], 36) ^ X[0]; + X[2] += X[7]; X[7] = ROTL64(X[7], 39) ^ X[2]; + X[6] += X[1]; X[1] = ROTL64(X[1], 44) ^ X[6]; + X[4] += X[3]; X[3] = ROTL64(X[3], 56) ^ X[4]; + X[2] += X[5]; X[5] = ROTL64(X[5], 54) ^ X[2]; + X[0] += X[7]; X[7] = ROTL64(X[7], 9) ^ X[0]; + + X[0] += Xt[r + 1]; + X[1] += Xt[r + 2]; + X[2] += Xt[r + 3]; + X[3] += Xt[r + 4]; + X[4] += Xt[r + 5]; + X[5] += Xt[r + 6] + T[1]; + X[6] += Xt[r + 7] + T[2]; + X[7] += Xt[r + 8] + r + 1; + + T[3] = T[0]; + T[0] = T[1]; + T[1] = T[2]; + T[2] = T[3]; + + X[0] += X[1]; X[1] = ROTL64(X[1], 39) ^ X[0]; + X[2] += X[3]; X[3] = ROTL64(X[3], 30) ^ X[2]; + X[4] += X[5]; X[5] = ROTL64(X[5], 34) ^ X[4]; + X[6] += X[7]; X[7] = ROTL64(X[7], 24) ^ X[6]; + X[2] += X[1]; X[1] = ROTL64(X[1], 13) ^ X[2]; + X[0] += X[3]; X[3] = ROTL64(X[3], 17) ^ X[0]; + X[6] += X[5]; X[5] = ROTL64(X[5], 10) ^ X[6]; + X[4] += X[7]; X[7] = ROTL64(X[7], 50) ^ X[4]; + X[4] += X[1]; X[1] = ROTL64(X[1], 25) ^ X[4]; + X[6] += X[3]; X[3] = ROTL64(X[3], 29) ^ X[6]; + X[0] += X[5]; X[5] = ROTL64(X[5], 39) ^ X[0]; + X[2] += X[7]; X[7] = ROTL64(X[7], 43) ^ X[2]; + X[6] += X[1]; X[1] = ROTL64(X[1], 8) ^ X[6]; + X[4] += X[3]; X[3] = ROTL64(X[3], 22) ^ X[4]; + X[2] += X[5]; X[5] = ROTL64(X[5], 56) ^ X[2]; + X[0] += X[7]; X[7] = ROTL64(X[7], 35) ^ X[0]; + + X[0] += Xt[r + 2]; + X[1] += Xt[r + 3]; + X[2] += Xt[r + 4]; + X[3] += Xt[r + 5]; + X[4] += Xt[r + 6]; + X[5] += Xt[r + 7] + T[1]; + X[6] += Xt[r + 8] + T[2]; + X[7] += Xt[r + 9] + r + 2; + + T[3] = T[0]; + T[0] = T[1]; + T[1] = T[2]; + T[2] = T[3]; + } + + S->X[0] = key[0] ^ X[0]; + S->X[1] = key[1] ^ X[1]; + S->X[2] = key[2] ^ X[2]; + S->X[3] = key[3] ^ X[3]; + S->X[4] = key[4] ^ X[4]; + S->X[5] = key[5] ^ X[5]; + S->X[6] = key[6] ^ X[6]; + S->X[7] = key[7] ^ X[7]; + + S->T[0] = T[0]; + S->T[1] = T[1] & ~0x4000000000000000ull; + } +} + +static void +scrypt_hash_init(scrypt_hash_state *S) { + S->X[0] = 0x4903ADFF749C51CEull; + S->X[1] = 0x0D95DE399746DF03ull; + S->X[2] = 0x8FD1934127C79BCEull; + S->X[3] = 0x9A255629FF352CB1ull; + S->X[4] = 0x5DB62599DF6CA7B0ull; + S->X[5] = 0xEABE394CA9D5C3F4ull; + S->X[6] = 0x991112C71A75B523ull; + S->X[7] = 0xAE18A40B660FCC33ull; + S->T[0] = 0x0000000000000000ull; + S->T[1] = 0x7000000000000000ull; + S->leftover = 0; +} + +static void +scrypt_hash_update(scrypt_hash_state *S, const uint8_t *in, size_t inlen) { + size_t blocks, want; + + /* skein processes the final <=64 bytes raw, so we can only update if there are at least 64+1 bytes available */ + if ((S->leftover + inlen) > SCRYPT_HASH_BLOCK_SIZE) { + /* handle the previous data, we know there is enough for at least one block */ + if (S->leftover) { + want = (SCRYPT_HASH_BLOCK_SIZE - S->leftover); + memcpy(S->buffer + S->leftover, in, want); + in += want; + inlen -= want; + S->leftover = 0; + skein512_blocks(S, S->buffer, 1, SCRYPT_HASH_BLOCK_SIZE); + } + + /* handle the current data if there's more than one block */ + if (inlen > SCRYPT_HASH_BLOCK_SIZE) { + blocks = ((inlen - 1) & ~(SCRYPT_HASH_BLOCK_SIZE - 1)); + skein512_blocks(S, in, blocks / SCRYPT_HASH_BLOCK_SIZE, SCRYPT_HASH_BLOCK_SIZE); + inlen -= blocks; + in += blocks; + } + } + + /* handle leftover data */ + memcpy(S->buffer + S->leftover, in, inlen); + S->leftover += inlen; +} + +static void +scrypt_hash_finish(scrypt_hash_state *S, uint8_t *hash) { + memset(S->buffer + S->leftover, 0, SCRYPT_HASH_BLOCK_SIZE - S->leftover); + S->T[1] |= 0x8000000000000000ull; + skein512_blocks(S, S->buffer, 1, S->leftover); + + memset(S->buffer, 0, SCRYPT_HASH_BLOCK_SIZE); + S->T[0] = 0; + S->T[1] = 0xff00000000000000ull; + skein512_blocks(S, S->buffer, 1, 8); + + U64TO8_LE(&hash[ 0], S->X[0]); + U64TO8_LE(&hash[ 8], S->X[1]); + U64TO8_LE(&hash[16], S->X[2]); + U64TO8_LE(&hash[24], S->X[3]); + U64TO8_LE(&hash[32], S->X[4]); + U64TO8_LE(&hash[40], S->X[5]); + U64TO8_LE(&hash[48], S->X[6]); + U64TO8_LE(&hash[56], S->X[7]); +} + + +static const uint8_t scrypt_test_hash_expected[SCRYPT_HASH_DIGEST_SIZE] = { + 0x4d,0x52,0x29,0xff,0x10,0xbc,0xd2,0x62,0xd1,0x61,0x83,0xc8,0xe6,0xf0,0x83,0xc4, + 0x9f,0xf5,0x6a,0x42,0x75,0x2a,0x26,0x4e,0xf0,0x28,0x72,0x28,0x47,0xe8,0x23,0xdf, + 0x1e,0x64,0xf1,0x51,0x38,0x35,0x9d,0xc2,0x83,0xfc,0x35,0x4e,0xc0,0x52,0x5f,0x41, + 0x6a,0x0b,0x7d,0xf5,0xce,0x98,0xde,0x6f,0x36,0xd8,0x51,0x15,0x78,0x78,0x93,0x67, +}; diff --git a/bf/scrypt-jane/code/scrypt-jane-mix_chacha-avx.h b/bf/scrypt-jane/code/scrypt-jane-mix_chacha-avx.h new file mode 100644 index 0000000..ddd3ee1 --- /dev/null +++ b/bf/scrypt-jane/code/scrypt-jane-mix_chacha-avx.h @@ -0,0 +1,368 @@ +/* x86 */ +#if defined(X86ASM_AVX) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS) + +#define SCRYPT_CHACHA_AVX + +asm_naked_fn_proto(void, scrypt_ChunkMix_avx)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) +asm_naked_fn(scrypt_ChunkMix_avx) + a1(push ebx) + a1(push edi) + a1(push esi) + a1(push ebp) + a2(mov ebp,esp) + a2(mov edi,[ebp+20]) + a2(mov esi,[ebp+24]) + a2(mov eax,[ebp+28]) + a2(mov ebx,[ebp+32]) + a2(sub esp,64) + a2(and esp,~63) + a2(lea edx,[ebx*2]) + a2(shl edx,6) + a2(lea ecx,[edx-64]) + a2(and eax, eax) + a2(mov ebx, 0x01000302) + a2(vmovd xmm4, ebx) + a2(mov ebx, 0x05040706) + a2(vmovd xmm0, ebx) + a2(mov ebx, 0x09080b0a) + a2(vmovd xmm1, ebx) + a2(mov ebx, 0x0d0c0f0e) + a2(vmovd xmm2, ebx) + a2(mov ebx, 0x02010003) + a2(vmovd xmm5, ebx) + a2(mov ebx, 0x06050407) + a2(vmovd xmm3, ebx) + a2(mov ebx, 0x0a09080b) + a2(vmovd xmm6, ebx) + a2(mov ebx, 0x0e0d0c0f) + a2(vmovd xmm7, ebx) + a3(vpunpckldq xmm4, xmm4, xmm0) + a3(vpunpckldq xmm5, xmm5, xmm3) + a3(vpunpckldq xmm1, xmm1, xmm2) + a3(vpunpckldq xmm6, xmm6, xmm7) + a3(vpunpcklqdq xmm4, xmm4, xmm1) + a3(vpunpcklqdq xmm5, xmm5, xmm6) + a2(vmovdqa xmm0,[ecx+esi+0]) + a2(vmovdqa xmm1,[ecx+esi+16]) + a2(vmovdqa xmm2,[ecx+esi+32]) + a2(vmovdqa xmm3,[ecx+esi+48]) + aj(jz scrypt_ChunkMix_avx_no_xor1) + a3(vpxor xmm0,xmm0,[ecx+eax+0]) + a3(vpxor xmm1,xmm1,[ecx+eax+16]) + a3(vpxor xmm2,xmm2,[ecx+eax+32]) + a3(vpxor xmm3,xmm3,[ecx+eax+48]) + a1(scrypt_ChunkMix_avx_no_xor1:) + a2(xor ecx,ecx) + a2(xor ebx,ebx) + a1(scrypt_ChunkMix_avx_loop:) + a2(and eax, eax) + a3(vpxor xmm0,xmm0,[esi+ecx+0]) + a3(vpxor xmm1,xmm1,[esi+ecx+16]) + a3(vpxor xmm2,xmm2,[esi+ecx+32]) + a3(vpxor xmm3,xmm3,[esi+ecx+48]) + aj(jz scrypt_ChunkMix_avx_no_xor2) + a3(vpxor xmm0,xmm0,[eax+ecx+0]) + a3(vpxor xmm1,xmm1,[eax+ecx+16]) + a3(vpxor xmm2,xmm2,[eax+ecx+32]) + a3(vpxor xmm3,xmm3,[eax+ecx+48]) + a1(scrypt_ChunkMix_avx_no_xor2:) + a2(vmovdqa [esp+0],xmm0) + a2(vmovdqa [esp+16],xmm1) + a2(vmovdqa [esp+32],xmm2) + a2(vmovdqa [esp+48],xmm3) + a2(mov eax,8) + a1(scrypt_chacha_avx_loop: ) + a3(vpaddd xmm0,xmm0,xmm1) + a3(vpxor xmm3,xmm3,xmm0) + a3(vpshufb xmm3,xmm3,xmm4) + a3(vpaddd xmm2,xmm2,xmm3) + a3(vpxor xmm1,xmm1,xmm2) + a3(vpsrld xmm6,xmm1,20) + a3(vpslld xmm1,xmm1,12) + a3(vpxor xmm1,xmm1,xmm6) + a3(vpaddd xmm0,xmm0,xmm1) + a3(vpxor xmm3,xmm3,xmm0) + a3(vpshufb xmm3,xmm3,xmm5) + a3(vpshufd xmm0,xmm0,0x93) + a3(vpaddd xmm2,xmm2,xmm3) + a3(vpshufd xmm3,xmm3,0x4e) + a3(vpxor xmm1,xmm1,xmm2) + a3(vpshufd xmm2,xmm2,0x39) + a3(vpsrld xmm6,xmm1,25) + a3(vpslld xmm1,xmm1,7) + a3(vpxor xmm1,xmm1,xmm6) + a3(vpaddd xmm0,xmm0,xmm1) + a3(vpxor xmm3,xmm3,xmm0) + a3(vpshufb xmm3,xmm3,xmm4) + a3(vpaddd xmm2,xmm2,xmm3) + a3(vpxor xmm1,xmm1,xmm2) + a3(vpsrld xmm6,xmm1,20) + a3(vpslld xmm1,xmm1,12) + a3(vpxor xmm1,xmm1,xmm6) + a3(vpaddd xmm0,xmm0,xmm1) + a3(vpxor xmm3,xmm3,xmm0) + a3(vpshufb xmm3,xmm3,xmm5) + a3(vpshufd xmm0,xmm0,0x39) + a3(vpaddd xmm2,xmm2,xmm3) + a3(vpshufd xmm3,xmm3,0x4e) + a3(vpxor xmm1,xmm1,xmm2) + a3(vpshufd xmm2,xmm2,0x93) + a3(vpsrld xmm6,xmm1,25) + a3(vpslld xmm1,xmm1,7) + a3(vpxor xmm1,xmm1,xmm6) + a2(sub eax,2) + aj(ja scrypt_chacha_avx_loop) + a3(vpaddd xmm0,xmm0,[esp+0]) + a3(vpaddd xmm1,xmm1,[esp+16]) + a3(vpaddd xmm2,xmm2,[esp+32]) + a3(vpaddd xmm3,xmm3,[esp+48]) + a2(lea eax,[ebx+ecx]) + a2(xor ebx,edx) + a2(and eax,~0x7f) + a2(add ecx,64) + a2(shr eax,1) + a2(add eax, edi) + a2(cmp ecx,edx) + a2(vmovdqa [eax+0],xmm0) + a2(vmovdqa [eax+16],xmm1) + a2(vmovdqa [eax+32],xmm2) + a2(vmovdqa [eax+48],xmm3) + a2(mov eax,[ebp+28]) + aj(jne scrypt_ChunkMix_avx_loop) + a2(mov esp,ebp) + a1(pop ebp) + a1(pop esi) + a1(pop edi) + a1(pop ebx) + aret(16) +asm_naked_fn_end(scrypt_ChunkMix_avx) + +#endif + + + +/* x64 */ +#if defined(X86_64ASM_AVX) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS) + +#define SCRYPT_CHACHA_AVX + +asm_naked_fn_proto(void, scrypt_ChunkMix_avx)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) +asm_naked_fn(scrypt_ChunkMix_avx) + a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */ + a2(shl rcx,6) + a2(lea r9,[rcx-64]) + a2(lea rax,[rsi+r9]) + a2(lea r9,[rdx+r9]) + a2(and rdx, rdx) + a2(vmovdqa xmm0,[rax+0]) + a2(vmovdqa xmm1,[rax+16]) + a2(vmovdqa xmm2,[rax+32]) + a2(vmovdqa xmm3,[rax+48]) + a2(mov r8, 0x0504070601000302) + a2(mov rax, 0x0d0c0f0e09080b0a) + a2(movd xmm4, r8) + a2(movd xmm6, rax) + a2(mov r8, 0x0605040702010003) + a2(mov rax, 0x0e0d0c0f0a09080b) + a2(movd xmm5, r8) + a2(movd xmm7, rax) + a3(vpunpcklqdq xmm4, xmm4, xmm6) + a3(vpunpcklqdq xmm5, xmm5, xmm7) + aj(jz scrypt_ChunkMix_avx_no_xor1) + a3(vpxor xmm0,xmm0,[r9+0]) + a3(vpxor xmm1,xmm1,[r9+16]) + a3(vpxor xmm2,xmm2,[r9+32]) + a3(vpxor xmm3,xmm3,[r9+48]) + a1(scrypt_ChunkMix_avx_no_xor1:) + a2(xor r8,r8) + a2(xor r9,r9) + a1(scrypt_ChunkMix_avx_loop:) + a2(and rdx, rdx) + a3(vpxor xmm0,xmm0,[rsi+r9+0]) + a3(vpxor xmm1,xmm1,[rsi+r9+16]) + a3(vpxor xmm2,xmm2,[rsi+r9+32]) + a3(vpxor xmm3,xmm3,[rsi+r9+48]) + aj(jz scrypt_ChunkMix_avx_no_xor2) + a3(vpxor xmm0,xmm0,[rdx+r9+0]) + a3(vpxor xmm1,xmm1,[rdx+r9+16]) + a3(vpxor xmm2,xmm2,[rdx+r9+32]) + a3(vpxor xmm3,xmm3,[rdx+r9+48]) + a1(scrypt_ChunkMix_avx_no_xor2:) + a2(vmovdqa xmm8,xmm0) + a2(vmovdqa xmm9,xmm1) + a2(vmovdqa xmm10,xmm2) + a2(vmovdqa xmm11,xmm3) + a2(mov rax,8) + a1(scrypt_chacha_avx_loop: ) + a3(vpaddd xmm0,xmm0,xmm1) + a3(vpxor xmm3,xmm3,xmm0) + a3(vpshufb xmm3,xmm3,xmm4) + a3(vpaddd xmm2,xmm2,xmm3) + a3(vpxor xmm1,xmm1,xmm2) + a3(vpsrld xmm12,xmm1,20) + a3(vpslld xmm1,xmm1,12) + a3(vpxor xmm1,xmm1,xmm12) + a3(vpaddd xmm0,xmm0,xmm1) + a3(vpxor xmm3,xmm3,xmm0) + a3(vpshufb xmm3,xmm3,xmm5) + a3(vpshufd xmm0,xmm0,0x93) + a3(vpaddd xmm2,xmm2,xmm3) + a3(vpshufd xmm3,xmm3,0x4e) + a3(vpxor xmm1,xmm1,xmm2) + a3(vpshufd xmm2,xmm2,0x39) + a3(vpsrld xmm12,xmm1,25) + a3(vpslld xmm1,xmm1,7) + a3(vpxor xmm1,xmm1,xmm12) + a3(vpaddd xmm0,xmm0,xmm1) + a3(vpxor xmm3,xmm3,xmm0) + a3(vpshufb xmm3,xmm3,xmm4) + a3(vpaddd xmm2,xmm2,xmm3) + a3(vpxor xmm1,xmm1,xmm2) + a3(vpsrld xmm12,xmm1,20) + a3(vpslld xmm1,xmm1,12) + a3(vpxor xmm1,xmm1,xmm12) + a3(vpaddd xmm0,xmm0,xmm1) + a3(vpxor xmm3,xmm3,xmm0) + a3(vpshufb xmm3,xmm3,xmm5) + a3(vpshufd xmm0,xmm0,0x39) + a3(vpaddd xmm2,xmm2,xmm3) + a3(vpshufd xmm3,xmm3,0x4e) + a3(vpxor xmm1,xmm1,xmm2) + a3(vpshufd xmm2,xmm2,0x93) + a3(vpsrld xmm12,xmm1,25) + a3(vpslld xmm1,xmm1,7) + a3(vpxor xmm1,xmm1,xmm12) + a2(sub rax,2) + aj(ja scrypt_chacha_avx_loop) + a3(vpaddd xmm0,xmm0,xmm8) + a3(vpaddd xmm1,xmm1,xmm9) + a3(vpaddd xmm2,xmm2,xmm10) + a3(vpaddd xmm3,xmm3,xmm11) + a2(lea rax,[r8+r9]) + a2(xor r8,rcx) + a2(and rax,~0x7f) + a2(add r9,64) + a2(shr rax,1) + a2(add rax, rdi) + a2(cmp r9,rcx) + a2(vmovdqa [rax+0],xmm0) + a2(vmovdqa [rax+16],xmm1) + a2(vmovdqa [rax+32],xmm2) + a2(vmovdqa [rax+48],xmm3) + aj(jne scrypt_ChunkMix_avx_loop) + a1(ret) +asm_naked_fn_end(scrypt_ChunkMix_avx) + +#endif + + +/* intrinsic */ +#if defined(X86_INTRINSIC_AVX) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED)) + +#define SCRYPT_CHACHA_AVX + +static void asm_calling_convention NOINLINE +scrypt_ChunkMix_avx(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) { + uint32_t i, blocksPerChunk = r * 2, half = 0; + xmmi *xmmp,x0,x1,x2,x3,x6,t0,t1,t2,t3; + const xmmi x4 = *(xmmi *)&ssse3_rotl16_32bit, x5 = *(xmmi *)&ssse3_rotl8_32bit; + size_t rounds; + + /* 1: X = B_{2r - 1} */ + xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1); + x0 = xmmp[0]; + x1 = xmmp[1]; + x2 = xmmp[2]; + x3 = xmmp[3]; + + if (Bxor) { + xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1); + x0 = _mm_xor_si128(x0, xmmp[0]); + x1 = _mm_xor_si128(x1, xmmp[1]); + x2 = _mm_xor_si128(x2, xmmp[2]); + x3 = _mm_xor_si128(x3, xmmp[3]); + } + + /* 2: for i = 0 to 2r - 1 do */ + for (i = 0; i < blocksPerChunk; i++, half ^= r) { + /* 3: X = H(X ^ B_i) */ + xmmp = (xmmi *)scrypt_block(Bin, i); + x0 = _mm_xor_si128(x0, xmmp[0]); + x1 = _mm_xor_si128(x1, xmmp[1]); + x2 = _mm_xor_si128(x2, xmmp[2]); + x3 = _mm_xor_si128(x3, xmmp[3]); + + if (Bxor) { + xmmp = (xmmi *)scrypt_block(Bxor, i); + x0 = _mm_xor_si128(x0, xmmp[0]); + x1 = _mm_xor_si128(x1, xmmp[1]); + x2 = _mm_xor_si128(x2, xmmp[2]); + x3 = _mm_xor_si128(x3, xmmp[3]); + } + + t0 = x0; + t1 = x1; + t2 = x2; + t3 = x3; + + for (rounds = 8; rounds; rounds -= 2) { + x0 = _mm_add_epi32(x0, x1); + x3 = _mm_xor_si128(x3, x0); + x3 = _mm_shuffle_epi8(x3, x4); + x2 = _mm_add_epi32(x2, x3); + x1 = _mm_xor_si128(x1, x2); + x6 = x1; + x1 = _mm_or_si128(_mm_slli_epi32(x1, 12), _mm_srli_epi32(x6, 20)); + x0 = _mm_add_epi32(x0, x1); + x3 = _mm_xor_si128(x3, x0); + x3 = _mm_shuffle_epi8(x3, x5); + x0 = _mm_shuffle_epi32(x0, 0x93); + x2 = _mm_add_epi32(x2, x3); + x3 = _mm_shuffle_epi32(x3, 0x4e); + x1 = _mm_xor_si128(x1, x2); + x2 = _mm_shuffle_epi32(x2, 0x39); + x6 = x1; + x1 = _mm_or_si128(_mm_slli_epi32(x1, 7), _mm_srli_epi32(x6, 25)); + x0 = _mm_add_epi32(x0, x1); + x3 = _mm_xor_si128(x3, x0); + x3 = _mm_shuffle_epi8(x3, x4); + x2 = _mm_add_epi32(x2, x3); + x1 = _mm_xor_si128(x1, x2); + x6 = x1; + x1 = _mm_or_si128(_mm_slli_epi32(x1, 12), _mm_srli_epi32(x6, 20)); + x0 = _mm_add_epi32(x0, x1); + x3 = _mm_xor_si128(x3, x0); + x3 = _mm_shuffle_epi8(x3, x5); + x0 = _mm_shuffle_epi32(x0, 0x39); + x2 = _mm_add_epi32(x2, x3); + x3 = _mm_shuffle_epi32(x3, 0x4e); + x1 = _mm_xor_si128(x1, x2); + x2 = _mm_shuffle_epi32(x2, 0x93); + x6 = x1; + x1 = _mm_or_si128(_mm_slli_epi32(x1, 7), _mm_srli_epi32(x6, 25)); + } + + x0 = _mm_add_epi32(x0, t0); + x1 = _mm_add_epi32(x1, t1); + x2 = _mm_add_epi32(x2, t2); + x3 = _mm_add_epi32(x3, t3); + + /* 4: Y_i = X */ + /* 6: B'[0..r-1] = Y_even */ + /* 6: B'[r..2r-1] = Y_odd */ + xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half); + xmmp[0] = x0; + xmmp[1] = x1; + xmmp[2] = x2; + xmmp[3] = x3; + } +} + +#endif + +#if defined(SCRYPT_CHACHA_AVX) + #undef SCRYPT_MIX + #define SCRYPT_MIX "ChaCha/8-AVX" + #undef SCRYPT_CHACHA_INCLUDED + #define SCRYPT_CHACHA_INCLUDED +#endif diff --git a/bf/scrypt-jane/code/scrypt-jane-mix_chacha-sse2.h b/bf/scrypt-jane/code/scrypt-jane-mix_chacha-sse2.h new file mode 100644 index 0000000..a8c2197 --- /dev/null +++ b/bf/scrypt-jane/code/scrypt-jane-mix_chacha-sse2.h @@ -0,0 +1,363 @@ +/* x86 */ +#if defined(X86ASM_SSE2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS) + +#define SCRYPT_CHACHA_SSE2 + +asm_naked_fn_proto(void, scrypt_ChunkMix_sse2)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) +asm_naked_fn(scrypt_ChunkMix_sse2) + a1(push ebx) + a1(push edi) + a1(push esi) + a1(push ebp) + a2(mov ebp,esp) + a2(mov edi,[ebp+20]) + a2(mov esi,[ebp+24]) + a2(mov eax,[ebp+28]) + a2(mov ebx,[ebp+32]) + a2(sub esp,16) + a2(and esp,~15) + a2(lea edx,[ebx*2]) + a2(shl edx,6) + a2(lea ecx,[edx-64]) + a2(and eax, eax) + a2(movdqa xmm0,[ecx+esi+0]) + a2(movdqa xmm1,[ecx+esi+16]) + a2(movdqa xmm2,[ecx+esi+32]) + a2(movdqa xmm3,[ecx+esi+48]) + aj(jz scrypt_ChunkMix_sse2_no_xor1) + a2(pxor xmm0,[ecx+eax+0]) + a2(pxor xmm1,[ecx+eax+16]) + a2(pxor xmm2,[ecx+eax+32]) + a2(pxor xmm3,[ecx+eax+48]) + a1(scrypt_ChunkMix_sse2_no_xor1:) + a2(xor ecx,ecx) + a2(xor ebx,ebx) + a1(scrypt_ChunkMix_sse2_loop:) + a2(and eax, eax) + a2(pxor xmm0,[esi+ecx+0]) + a2(pxor xmm1,[esi+ecx+16]) + a2(pxor xmm2,[esi+ecx+32]) + a2(pxor xmm3,[esi+ecx+48]) + aj(jz scrypt_ChunkMix_sse2_no_xor2) + a2(pxor xmm0,[eax+ecx+0]) + a2(pxor xmm1,[eax+ecx+16]) + a2(pxor xmm2,[eax+ecx+32]) + a2(pxor xmm3,[eax+ecx+48]) + a1(scrypt_ChunkMix_sse2_no_xor2:) + a2(movdqa [esp+0],xmm0) + a2(movdqa xmm4,xmm1) + a2(movdqa xmm5,xmm2) + a2(movdqa xmm7,xmm3) + a2(mov eax,8) + a1(scrypt_chacha_sse2_loop: ) + a2(paddd xmm0,xmm1) + a2(pxor xmm3,xmm0) + a3(pshuflw xmm3,xmm3,0xb1) + a3(pshufhw xmm3,xmm3,0xb1) + a2(paddd xmm2,xmm3) + a2(pxor xmm1,xmm2) + a2(movdqa xmm6,xmm1) + a2(pslld xmm1,12) + a2(psrld xmm6,20) + a2(pxor xmm1,xmm6) + a2(paddd xmm0,xmm1) + a2(pxor xmm3,xmm0) + a2(movdqa xmm6,xmm3) + a2(pslld xmm3,8) + a2(psrld xmm6,24) + a2(pxor xmm3,xmm6) + a3(pshufd xmm0,xmm0,0x93) + a2(paddd xmm2,xmm3) + a3(pshufd xmm3,xmm3,0x4e) + a2(pxor xmm1,xmm2) + a3(pshufd xmm2,xmm2,0x39) + a2(movdqa xmm6,xmm1) + a2(pslld xmm1,7) + a2(psrld xmm6,25) + a2(pxor xmm1,xmm6) + a2(sub eax,2) + a2(paddd xmm0,xmm1) + a2(pxor xmm3,xmm0) + a3(pshuflw xmm3,xmm3,0xb1) + a3(pshufhw xmm3,xmm3,0xb1) + a2(paddd xmm2,xmm3) + a2(pxor xmm1,xmm2) + a2(movdqa xmm6,xmm1) + a2(pslld xmm1,12) + a2(psrld xmm6,20) + a2(pxor xmm1,xmm6) + a2(paddd xmm0,xmm1) + a2(pxor xmm3,xmm0) + a2(movdqa xmm6,xmm3) + a2(pslld xmm3,8) + a2(psrld xmm6,24) + a2(pxor xmm3,xmm6) + a3(pshufd xmm0,xmm0,0x39) + a2(paddd xmm2,xmm3) + a3(pshufd xmm3,xmm3,0x4e) + a2(pxor xmm1,xmm2) + a3(pshufd xmm2,xmm2,0x93) + a2(movdqa xmm6,xmm1) + a2(pslld xmm1,7) + a2(psrld xmm6,25) + a2(pxor xmm1,xmm6) + aj(ja scrypt_chacha_sse2_loop) + a2(paddd xmm0,[esp+0]) + a2(paddd xmm1,xmm4) + a2(paddd xmm2,xmm5) + a2(paddd xmm3,xmm7) + a2(lea eax,[ebx+ecx]) + a2(xor ebx,edx) + a2(and eax,~0x7f) + a2(add ecx,64) + a2(shr eax,1) + a2(add eax, edi) + a2(cmp ecx,edx) + a2(movdqa [eax+0],xmm0) + a2(movdqa [eax+16],xmm1) + a2(movdqa [eax+32],xmm2) + a2(movdqa [eax+48],xmm3) + a2(mov eax,[ebp+28]) + aj(jne scrypt_ChunkMix_sse2_loop) + a2(mov esp,ebp) + a1(pop ebp) + a1(pop esi) + a1(pop edi) + a1(pop ebx) + aret(16) +asm_naked_fn_end(scrypt_ChunkMix_sse2) + +#endif + + + +/* x64 */ +#if defined(X86_64ASM_SSE2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS) + +#define SCRYPT_CHACHA_SSE2 + +asm_naked_fn_proto(void, scrypt_ChunkMix_sse2)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) +asm_naked_fn(scrypt_ChunkMix_sse2) + a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */ + a2(shl rcx,6) + a2(lea r9,[rcx-64]) + a2(lea rax,[rsi+r9]) + a2(lea r9,[rdx+r9]) + a2(and rdx, rdx) + a2(movdqa xmm0,[rax+0]) + a2(movdqa xmm1,[rax+16]) + a2(movdqa xmm2,[rax+32]) + a2(movdqa xmm3,[rax+48]) + aj(jz scrypt_ChunkMix_sse2_no_xor1) + a2(pxor xmm0,[r9+0]) + a2(pxor xmm1,[r9+16]) + a2(pxor xmm2,[r9+32]) + a2(pxor xmm3,[r9+48]) + a1(scrypt_ChunkMix_sse2_no_xor1:) + a2(xor r9,r9) + a2(xor r8,r8) + a1(scrypt_ChunkMix_sse2_loop:) + a2(and rdx, rdx) + a2(pxor xmm0,[rsi+r9+0]) + a2(pxor xmm1,[rsi+r9+16]) + a2(pxor xmm2,[rsi+r9+32]) + a2(pxor xmm3,[rsi+r9+48]) + aj(jz scrypt_ChunkMix_sse2_no_xor2) + a2(pxor xmm0,[rdx+r9+0]) + a2(pxor xmm1,[rdx+r9+16]) + a2(pxor xmm2,[rdx+r9+32]) + a2(pxor xmm3,[rdx+r9+48]) + a1(scrypt_ChunkMix_sse2_no_xor2:) + a2(movdqa xmm8,xmm0) + a2(movdqa xmm9,xmm1) + a2(movdqa xmm10,xmm2) + a2(movdqa xmm11,xmm3) + a2(mov rax,8) + a1(scrypt_chacha_sse2_loop: ) + a2(paddd xmm0,xmm1) + a2(pxor xmm3,xmm0) + a3(pshuflw xmm3,xmm3,0xb1) + a3(pshufhw xmm3,xmm3,0xb1) + a2(paddd xmm2,xmm3) + a2(pxor xmm1,xmm2) + a2(movdqa xmm6,xmm1) + a2(pslld xmm1,12) + a2(psrld xmm6,20) + a2(pxor xmm1,xmm6) + a2(paddd xmm0,xmm1) + a2(pxor xmm3,xmm0) + a2(movdqa xmm6,xmm3) + a2(pslld xmm3,8) + a2(psrld xmm6,24) + a2(pxor xmm3,xmm6) + a3(pshufd xmm0,xmm0,0x93) + a2(paddd xmm2,xmm3) + a3(pshufd xmm3,xmm3,0x4e) + a2(pxor xmm1,xmm2) + a3(pshufd xmm2,xmm2,0x39) + a2(movdqa xmm6,xmm1) + a2(pslld xmm1,7) + a2(psrld xmm6,25) + a2(pxor xmm1,xmm6) + a2(sub rax,2) + a2(paddd xmm0,xmm1) + a2(pxor xmm3,xmm0) + a3(pshuflw xmm3,xmm3,0xb1) + a3(pshufhw xmm3,xmm3,0xb1) + a2(paddd xmm2,xmm3) + a2(pxor xmm1,xmm2) + a2(movdqa xmm6,xmm1) + a2(pslld xmm1,12) + a2(psrld xmm6,20) + a2(pxor xmm1,xmm6) + a2(paddd xmm0,xmm1) + a2(pxor xmm3,xmm0) + a2(movdqa xmm6,xmm3) + a2(pslld xmm3,8) + a2(psrld xmm6,24) + a2(pxor xmm3,xmm6) + a3(pshufd xmm0,xmm0,0x39) + a2(paddd xmm2,xmm3) + a3(pshufd xmm3,xmm3,0x4e) + a2(pxor xmm1,xmm2) + a3(pshufd xmm2,xmm2,0x93) + a2(movdqa xmm6,xmm1) + a2(pslld xmm1,7) + a2(psrld xmm6,25) + a2(pxor xmm1,xmm6) + aj(ja scrypt_chacha_sse2_loop) + a2(paddd xmm0,xmm8) + a2(paddd xmm1,xmm9) + a2(paddd xmm2,xmm10) + a2(paddd xmm3,xmm11) + a2(lea rax,[r8+r9]) + a2(xor r8,rcx) + a2(and rax,~0x7f) + a2(add r9,64) + a2(shr rax,1) + a2(add rax, rdi) + a2(cmp r9,rcx) + a2(movdqa [rax+0],xmm0) + a2(movdqa [rax+16],xmm1) + a2(movdqa [rax+32],xmm2) + a2(movdqa [rax+48],xmm3) + aj(jne scrypt_ChunkMix_sse2_loop) + a1(ret) +asm_naked_fn_end(scrypt_ChunkMix_sse2) + +#endif + + +/* intrinsic */ +#if defined(X86_INTRINSIC_SSE2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED)) + +#define SCRYPT_CHACHA_SSE2 + +static void NOINLINE asm_calling_convention +scrypt_ChunkMix_sse2(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) { + uint32_t i, blocksPerChunk = r * 2, half = 0; + xmmi *xmmp,x0,x1,x2,x3,x4,t0,t1,t2,t3; + size_t rounds; + + /* 1: X = B_{2r - 1} */ + xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1); + x0 = xmmp[0]; + x1 = xmmp[1]; + x2 = xmmp[2]; + x3 = xmmp[3]; + + if (Bxor) { + xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1); + x0 = _mm_xor_si128(x0, xmmp[0]); + x1 = _mm_xor_si128(x1, xmmp[1]); + x2 = _mm_xor_si128(x2, xmmp[2]); + x3 = _mm_xor_si128(x3, xmmp[3]); + } + + /* 2: for i = 0 to 2r - 1 do */ + for (i = 0; i < blocksPerChunk; i++, half ^= r) { + /* 3: X = H(X ^ B_i) */ + xmmp = (xmmi *)scrypt_block(Bin, i); + x0 = _mm_xor_si128(x0, xmmp[0]); + x1 = _mm_xor_si128(x1, xmmp[1]); + x2 = _mm_xor_si128(x2, xmmp[2]); + x3 = _mm_xor_si128(x3, xmmp[3]); + + if (Bxor) { + xmmp = (xmmi *)scrypt_block(Bxor, i); + x0 = _mm_xor_si128(x0, xmmp[0]); + x1 = _mm_xor_si128(x1, xmmp[1]); + x2 = _mm_xor_si128(x2, xmmp[2]); + x3 = _mm_xor_si128(x3, xmmp[3]); + } + + t0 = x0; + t1 = x1; + t2 = x2; + t3 = x3; + + for (rounds = 8; rounds; rounds -= 2) { + x0 = _mm_add_epi32(x0, x1); + x3 = _mm_xor_si128(x3, x0); + x4 = x3; + x3 = _mm_shufflehi_epi16(_mm_shufflelo_epi16(x3, 0xb1), 0xb1); + x2 = _mm_add_epi32(x2, x3); + x1 = _mm_xor_si128(x1, x2); + x4 = x1; + x1 = _mm_or_si128(_mm_slli_epi32(x1, 12), _mm_srli_epi32(x4, 20)); + x0 = _mm_add_epi32(x0, x1); + x3 = _mm_xor_si128(x3, x0); + x4 = x3; + x3 = _mm_or_si128(_mm_slli_epi32(x3, 8), _mm_srli_epi32(x4, 24)); + x0 = _mm_shuffle_epi32(x0, 0x93); + x2 = _mm_add_epi32(x2, x3); + x3 = _mm_shuffle_epi32(x3, 0x4e); + x1 = _mm_xor_si128(x1, x2); + x2 = _mm_shuffle_epi32(x2, 0x39); + x4 = x1; + x1 = _mm_or_si128(_mm_slli_epi32(x1, 7), _mm_srli_epi32(x4, 25)); + x0 = _mm_add_epi32(x0, x1); + x3 = _mm_xor_si128(x3, x0); + x4 = x3; + x3 = _mm_shufflehi_epi16(_mm_shufflelo_epi16(x3, 0xb1), 0xb1); + x2 = _mm_add_epi32(x2, x3); + x1 = _mm_xor_si128(x1, x2); + x4 = x1; + x1 = _mm_or_si128(_mm_slli_epi32(x1, 12), _mm_srli_epi32(x4, 20)); + x0 = _mm_add_epi32(x0, x1); + x3 = _mm_xor_si128(x3, x0); + x4 = x3; + x3 = _mm_or_si128(_mm_slli_epi32(x3, 8), _mm_srli_epi32(x4, 24)); + x0 = _mm_shuffle_epi32(x0, 0x39); + x2 = _mm_add_epi32(x2, x3); + x3 = _mm_shuffle_epi32(x3, 0x4e); + x1 = _mm_xor_si128(x1, x2); + x2 = _mm_shuffle_epi32(x2, 0x93); + x4 = x1; + x1 = _mm_or_si128(_mm_slli_epi32(x1, 7), _mm_srli_epi32(x4, 25)); + } + + x0 = _mm_add_epi32(x0, t0); + x1 = _mm_add_epi32(x1, t1); + x2 = _mm_add_epi32(x2, t2); + x3 = _mm_add_epi32(x3, t3); + + /* 4: Y_i = X */ + /* 6: B'[0..r-1] = Y_even */ + /* 6: B'[r..2r-1] = Y_odd */ + xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half); + xmmp[0] = x0; + xmmp[1] = x1; + xmmp[2] = x2; + xmmp[3] = x3; + } +} + +#endif + +#if defined(SCRYPT_CHACHA_SSE2) + #undef SCRYPT_MIX + #define SCRYPT_MIX "ChaCha/8-SSE2" + #undef SCRYPT_CHACHA_INCLUDED + #define SCRYPT_CHACHA_INCLUDED +#endif diff --git a/bf/scrypt-jane/code/scrypt-jane-mix_chacha-ssse3.h b/bf/scrypt-jane/code/scrypt-jane-mix_chacha-ssse3.h new file mode 100644 index 0000000..894312e --- /dev/null +++ b/bf/scrypt-jane/code/scrypt-jane-mix_chacha-ssse3.h @@ -0,0 +1,376 @@ +/* x86 */ +#if defined(X86ASM_SSSE3) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS) + +#define SCRYPT_CHACHA_SSSE3 + +asm_naked_fn_proto(void, scrypt_ChunkMix_ssse3)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) +asm_naked_fn(scrypt_ChunkMix_ssse3) + a1(push ebx) + a1(push edi) + a1(push esi) + a1(push ebp) + a2(mov ebp,esp) + a2(mov edi,[ebp+20]) + a2(mov esi,[ebp+24]) + a2(mov eax,[ebp+28]) + a2(mov ebx,[ebp+32]) + a2(sub esp,64) + a2(and esp,~63) + a2(lea edx,[ebx*2]) + a2(shl edx,6) + a2(lea ecx,[edx-64]) + a2(and eax, eax) + a2(mov ebx, 0x01000302) + a2(movd xmm4, ebx) + a2(mov ebx, 0x05040706) + a2(movd xmm0, ebx) + a2(mov ebx, 0x09080b0a) + a2(movd xmm1, ebx) + a2(mov ebx, 0x0d0c0f0e) + a2(movd xmm2, ebx) + a2(mov ebx, 0x02010003) + a2(movd xmm5, ebx) + a2(mov ebx, 0x06050407) + a2(movd xmm3, ebx) + a2(mov ebx, 0x0a09080b) + a2(movd xmm6, ebx) + a2(mov ebx, 0x0e0d0c0f) + a2(movd xmm7, ebx) + a2(punpckldq xmm4, xmm0) + a2(punpckldq xmm5, xmm3) + a2(punpckldq xmm1, xmm2) + a2(punpckldq xmm6, xmm7) + a2(punpcklqdq xmm4, xmm1) + a2(punpcklqdq xmm5, xmm6) + a2(movdqa xmm0,[ecx+esi+0]) + a2(movdqa xmm1,[ecx+esi+16]) + a2(movdqa xmm2,[ecx+esi+32]) + a2(movdqa xmm3,[ecx+esi+48]) + aj(jz scrypt_ChunkMix_ssse3_no_xor1) + a2(pxor xmm0,[ecx+eax+0]) + a2(pxor xmm1,[ecx+eax+16]) + a2(pxor xmm2,[ecx+eax+32]) + a2(pxor xmm3,[ecx+eax+48]) + a1(scrypt_ChunkMix_ssse3_no_xor1:) + a2(xor ecx,ecx) + a2(xor ebx,ebx) + a1(scrypt_ChunkMix_ssse3_loop:) + a2(and eax, eax) + a2(pxor xmm0,[esi+ecx+0]) + a2(pxor xmm1,[esi+ecx+16]) + a2(pxor xmm2,[esi+ecx+32]) + a2(pxor xmm3,[esi+ecx+48]) + aj(jz scrypt_ChunkMix_ssse3_no_xor2) + a2(pxor xmm0,[eax+ecx+0]) + a2(pxor xmm1,[eax+ecx+16]) + a2(pxor xmm2,[eax+ecx+32]) + a2(pxor xmm3,[eax+ecx+48]) + a1(scrypt_ChunkMix_ssse3_no_xor2:) + a2(movdqa [esp+0],xmm0) + a2(movdqa [esp+16],xmm1) + a2(movdqa [esp+32],xmm2) + a2(movdqa xmm7,xmm3) + a2(mov eax,8) + a1(scrypt_chacha_ssse3_loop: ) + a2(paddd xmm0,xmm1) + a2(pxor xmm3,xmm0) + a2(pshufb xmm3,xmm4) + a2(paddd xmm2,xmm3) + a2(pxor xmm1,xmm2) + a2(movdqa xmm6,xmm1) + a2(pslld xmm1,12) + a2(psrld xmm6,20) + a2(pxor xmm1,xmm6) + a2(paddd xmm0,xmm1) + a2(pxor xmm3,xmm0) + a2(pshufb xmm3,xmm5) + a3(pshufd xmm0,xmm0,0x93) + a2(paddd xmm2,xmm3) + a3(pshufd xmm3,xmm3,0x4e) + a2(pxor xmm1,xmm2) + a3(pshufd xmm2,xmm2,0x39) + a2(movdqa xmm6,xmm1) + a2(pslld xmm1,7) + a2(psrld xmm6,25) + a2(pxor xmm1,xmm6) + a2(sub eax,2) + a2(paddd xmm0,xmm1) + a2(pxor xmm3,xmm0) + a2(pshufb xmm3,xmm4) + a2(paddd xmm2,xmm3) + a2(pxor xmm1,xmm2) + a2(movdqa xmm6,xmm1) + a2(pslld xmm1,12) + a2(psrld xmm6,20) + a2(pxor xmm1,xmm6) + a2(paddd xmm0,xmm1) + a2(pxor xmm3,xmm0) + a2(pshufb xmm3,xmm5) + a3(pshufd xmm0,xmm0,0x39) + a2(paddd xmm2,xmm3) + a3(pshufd xmm3,xmm3,0x4e) + a2(pxor xmm1,xmm2) + a3(pshufd xmm2,xmm2,0x93) + a2(movdqa xmm6,xmm1) + a2(pslld xmm1,7) + a2(psrld xmm6,25) + a2(pxor xmm1,xmm6) + aj(ja scrypt_chacha_ssse3_loop) + a2(paddd xmm0,[esp+0]) + a2(paddd xmm1,[esp+16]) + a2(paddd xmm2,[esp+32]) + a2(paddd xmm3,xmm7) + a2(lea eax,[ebx+ecx]) + a2(xor ebx,edx) + a2(and eax,~0x7f) + a2(add ecx,64) + a2(shr eax,1) + a2(add eax, edi) + a2(cmp ecx,edx) + a2(movdqa [eax+0],xmm0) + a2(movdqa [eax+16],xmm1) + a2(movdqa [eax+32],xmm2) + a2(movdqa [eax+48],xmm3) + a2(mov eax,[ebp+28]) + aj(jne scrypt_ChunkMix_ssse3_loop) + a2(mov esp,ebp) + a1(pop ebp) + a1(pop esi) + a1(pop edi) + a1(pop ebx) + aret(16) +asm_naked_fn_end(scrypt_ChunkMix_ssse3) + +#endif + + + +/* x64 */ +#if defined(X86_64ASM_SSSE3) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS) + +#define SCRYPT_CHACHA_SSSE3 + +asm_naked_fn_proto(void, scrypt_ChunkMix_ssse3)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) +asm_naked_fn(scrypt_ChunkMix_ssse3) + a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */ + a2(shl rcx,6) + a2(lea r9,[rcx-64]) + a2(lea rax,[rsi+r9]) + a2(lea r9,[rdx+r9]) + a2(and rdx, rdx) + a2(movdqa xmm0,[rax+0]) + a2(movdqa xmm1,[rax+16]) + a2(movdqa xmm2,[rax+32]) + a2(movdqa xmm3,[rax+48]) + a2(mov r8, 0x0504070601000302) + a2(mov rax, 0x0d0c0f0e09080b0a) + a2(movd xmm4, r8) + a2(movd xmm6, rax) + a2(mov r8, 0x0605040702010003) + a2(mov rax, 0x0e0d0c0f0a09080b) + a2(movd xmm5, r8) + a2(movd xmm7, rax) + a2(punpcklqdq xmm4, xmm6) + a2(punpcklqdq xmm5, xmm7) + aj(jz scrypt_ChunkMix_ssse3_no_xor1) + a2(pxor xmm0,[r9+0]) + a2(pxor xmm1,[r9+16]) + a2(pxor xmm2,[r9+32]) + a2(pxor xmm3,[r9+48]) + a1(scrypt_ChunkMix_ssse3_no_xor1:) + a2(xor r8,r8) + a2(xor r9,r9) + a1(scrypt_ChunkMix_ssse3_loop:) + a2(and rdx, rdx) + a2(pxor xmm0,[rsi+r9+0]) + a2(pxor xmm1,[rsi+r9+16]) + a2(pxor xmm2,[rsi+r9+32]) + a2(pxor xmm3,[rsi+r9+48]) + aj(jz scrypt_ChunkMix_ssse3_no_xor2) + a2(pxor xmm0,[rdx+r9+0]) + a2(pxor xmm1,[rdx+r9+16]) + a2(pxor xmm2,[rdx+r9+32]) + a2(pxor xmm3,[rdx+r9+48]) + a1(scrypt_ChunkMix_ssse3_no_xor2:) + a2(movdqa xmm8,xmm0) + a2(movdqa xmm9,xmm1) + a2(movdqa xmm10,xmm2) + a2(movdqa xmm11,xmm3) + a2(mov rax,8) + a1(scrypt_chacha_ssse3_loop: ) + a2(paddd xmm0,xmm1) + a2(pxor xmm3,xmm0) + a2(pshufb xmm3,xmm4) + a2(paddd xmm2,xmm3) + a2(pxor xmm1,xmm2) + a2(movdqa xmm12,xmm1) + a2(pslld xmm1,12) + a2(psrld xmm12,20) + a2(pxor xmm1,xmm12) + a2(paddd xmm0,xmm1) + a2(pxor xmm3,xmm0) + a2(pshufb xmm3,xmm5) + a3(pshufd xmm0,xmm0,0x93) + a2(paddd xmm2,xmm3) + a3(pshufd xmm3,xmm3,0x4e) + a2(pxor xmm1,xmm2) + a3(pshufd xmm2,xmm2,0x39) + a2(movdqa xmm12,xmm1) + a2(pslld xmm1,7) + a2(psrld xmm12,25) + a2(pxor xmm1,xmm12) + a2(sub rax,2) + a2(paddd xmm0,xmm1) + a2(pxor xmm3,xmm0) + a2(pshufb xmm3,xmm4) + a2(paddd xmm2,xmm3) + a2(pxor xmm1,xmm2) + a2(movdqa xmm12,xmm1) + a2(pslld xmm1,12) + a2(psrld xmm12,20) + a2(pxor xmm1,xmm12) + a2(paddd xmm0,xmm1) + a2(pxor xmm3,xmm0) + a2(pshufb xmm3,xmm5) + a3(pshufd xmm0,xmm0,0x39) + a2(paddd xmm2,xmm3) + a3(pshufd xmm3,xmm3,0x4e) + a2(pxor xmm1,xmm2) + a3(pshufd xmm2,xmm2,0x93) + a2(movdqa xmm12,xmm1) + a2(pslld xmm1,7) + a2(psrld xmm12,25) + a2(pxor xmm1,xmm12) + aj(ja scrypt_chacha_ssse3_loop) + a2(paddd xmm0,xmm8) + a2(paddd xmm1,xmm9) + a2(paddd xmm2,xmm10) + a2(paddd xmm3,xmm11) + a2(lea rax,[r8+r9]) + a2(xor r8,rcx) + a2(and rax,~0x7f) + a2(add r9,64) + a2(shr rax,1) + a2(add rax, rdi) + a2(cmp r9,rcx) + a2(movdqa [rax+0],xmm0) + a2(movdqa [rax+16],xmm1) + a2(movdqa [rax+32],xmm2) + a2(movdqa [rax+48],xmm3) + aj(jne scrypt_ChunkMix_ssse3_loop) + a1(ret) +asm_naked_fn_end(scrypt_ChunkMix_ssse3) + +#endif + + +/* intrinsic */ +#if defined(X86_INTRINSIC_SSSE3) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED)) + +#define SCRYPT_CHACHA_SSSE3 + +static void NOINLINE asm_calling_convention +scrypt_ChunkMix_ssse3(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) { + uint32_t i, blocksPerChunk = r * 2, half = 0; + xmmi *xmmp,x0,x1,x2,x3,x6,t0,t1,t2,t3; + const xmmi x4 = *(xmmi *)&ssse3_rotl16_32bit, x5 = *(xmmi *)&ssse3_rotl8_32bit; + size_t rounds; + + /* 1: X = B_{2r - 1} */ + xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1); + x0 = xmmp[0]; + x1 = xmmp[1]; + x2 = xmmp[2]; + x3 = xmmp[3]; + + if (Bxor) { + xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1); + x0 = _mm_xor_si128(x0, xmmp[0]); + x1 = _mm_xor_si128(x1, xmmp[1]); + x2 = _mm_xor_si128(x2, xmmp[2]); + x3 = _mm_xor_si128(x3, xmmp[3]); + } + + /* 2: for i = 0 to 2r - 1 do */ + for (i = 0; i < blocksPerChunk; i++, half ^= r) { + /* 3: X = H(X ^ B_i) */ + xmmp = (xmmi *)scrypt_block(Bin, i); + x0 = _mm_xor_si128(x0, xmmp[0]); + x1 = _mm_xor_si128(x1, xmmp[1]); + x2 = _mm_xor_si128(x2, xmmp[2]); + x3 = _mm_xor_si128(x3, xmmp[3]); + + if (Bxor) { + xmmp = (xmmi *)scrypt_block(Bxor, i); + x0 = _mm_xor_si128(x0, xmmp[0]); + x1 = _mm_xor_si128(x1, xmmp[1]); + x2 = _mm_xor_si128(x2, xmmp[2]); + x3 = _mm_xor_si128(x3, xmmp[3]); + } + + t0 = x0; + t1 = x1; + t2 = x2; + t3 = x3; + + for (rounds = 8; rounds; rounds -= 2) { + x0 = _mm_add_epi32(x0, x1); + x3 = _mm_xor_si128(x3, x0); + x3 = _mm_shuffle_epi8(x3, x4); + x2 = _mm_add_epi32(x2, x3); + x1 = _mm_xor_si128(x1, x2); + x6 = x1; + x1 = _mm_or_si128(_mm_slli_epi32(x1, 12), _mm_srli_epi32(x6, 20)); + x0 = _mm_add_epi32(x0, x1); + x3 = _mm_xor_si128(x3, x0); + x3 = _mm_shuffle_epi8(x3, x5); + x0 = _mm_shuffle_epi32(x0, 0x93); + x2 = _mm_add_epi32(x2, x3); + x3 = _mm_shuffle_epi32(x3, 0x4e); + x1 = _mm_xor_si128(x1, x2); + x2 = _mm_shuffle_epi32(x2, 0x39); + x6 = x1; + x1 = _mm_or_si128(_mm_slli_epi32(x1, 7), _mm_srli_epi32(x6, 25)); + x0 = _mm_add_epi32(x0, x1); + x3 = _mm_xor_si128(x3, x0); + x3 = _mm_shuffle_epi8(x3, x4); + x2 = _mm_add_epi32(x2, x3); + x1 = _mm_xor_si128(x1, x2); + x6 = x1; + x1 = _mm_or_si128(_mm_slli_epi32(x1, 12), _mm_srli_epi32(x6, 20)); + x0 = _mm_add_epi32(x0, x1); + x3 = _mm_xor_si128(x3, x0); + x3 = _mm_shuffle_epi8(x3, x5); + x0 = _mm_shuffle_epi32(x0, 0x39); + x2 = _mm_add_epi32(x2, x3); + x3 = _mm_shuffle_epi32(x3, 0x4e); + x1 = _mm_xor_si128(x1, x2); + x2 = _mm_shuffle_epi32(x2, 0x93); + x6 = x1; + x1 = _mm_or_si128(_mm_slli_epi32(x1, 7), _mm_srli_epi32(x6, 25)); + } + + x0 = _mm_add_epi32(x0, t0); + x1 = _mm_add_epi32(x1, t1); + x2 = _mm_add_epi32(x2, t2); + x3 = _mm_add_epi32(x3, t3); + + /* 4: Y_i = X */ + /* 6: B'[0..r-1] = Y_even */ + /* 6: B'[r..2r-1] = Y_odd */ + xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half); + xmmp[0] = x0; + xmmp[1] = x1; + xmmp[2] = x2; + xmmp[3] = x3; + } +} + +#endif + +#if defined(SCRYPT_CHACHA_SSSE3) + #undef SCRYPT_MIX + #define SCRYPT_MIX "ChaCha/8-SSSE3" + #undef SCRYPT_CHACHA_INCLUDED + #define SCRYPT_CHACHA_INCLUDED +#endif diff --git a/bf/scrypt-jane/code/scrypt-jane-mix_chacha-xop.h b/bf/scrypt-jane/code/scrypt-jane-mix_chacha-xop.h new file mode 100644 index 0000000..4c25d88 --- /dev/null +++ b/bf/scrypt-jane/code/scrypt-jane-mix_chacha-xop.h @@ -0,0 +1,315 @@ +/* x86 */ +#if defined(X86ASM_XOP) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS) + +#define SCRYPT_CHACHA_XOP + +asm_naked_fn_proto(void, scrypt_ChunkMix_xop)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) +asm_naked_fn(scrypt_ChunkMix_xop) + a1(push ebx) + a1(push edi) + a1(push esi) + a1(push ebp) + a2(mov ebp,esp) + a2(mov edi,[ebp+20]) + a2(mov esi,[ebp+24]) + a2(mov eax,[ebp+28]) + a2(mov ebx,[ebp+32]) + a2(sub esp,64) + a2(and esp,~63) + a2(lea edx,[ebx*2]) + a2(shl edx,6) + a2(lea ecx,[edx-64]) + a2(and eax, eax) + a2(vmovdqa xmm0,[ecx+esi+0]) + a2(vmovdqa xmm1,[ecx+esi+16]) + a2(vmovdqa xmm2,[ecx+esi+32]) + a2(vmovdqa xmm3,[ecx+esi+48]) + aj(jz scrypt_ChunkMix_xop_no_xor1) + a3(vpxor xmm0,xmm0,[ecx+eax+0]) + a3(vpxor xmm1,xmm1,[ecx+eax+16]) + a3(vpxor xmm2,xmm2,[ecx+eax+32]) + a3(vpxor xmm3,xmm3,[ecx+eax+48]) + a1(scrypt_ChunkMix_xop_no_xor1:) + a2(xor ecx,ecx) + a2(xor ebx,ebx) + a1(scrypt_ChunkMix_xop_loop:) + a2(and eax, eax) + a3(vpxor xmm0,xmm0,[esi+ecx+0]) + a3(vpxor xmm1,xmm1,[esi+ecx+16]) + a3(vpxor xmm2,xmm2,[esi+ecx+32]) + a3(vpxor xmm3,xmm3,[esi+ecx+48]) + aj(jz scrypt_ChunkMix_xop_no_xor2) + a3(vpxor xmm0,xmm0,[eax+ecx+0]) + a3(vpxor xmm1,xmm1,[eax+ecx+16]) + a3(vpxor xmm2,xmm2,[eax+ecx+32]) + a3(vpxor xmm3,xmm3,[eax+ecx+48]) + a1(scrypt_ChunkMix_xop_no_xor2:) + a2(vmovdqa xmm4,xmm0) + a2(vmovdqa xmm5,xmm1) + a2(vmovdqa xmm6,xmm2) + a2(vmovdqa xmm7,xmm3) + a2(mov eax,8) + a1(scrypt_chacha_xop_loop: ) + a3(vpaddd xmm0,xmm0,xmm1) + a3(vpxor xmm3,xmm3,xmm0) + a3(vprotd xmm3,xmm3,16) + a3(vpaddd xmm2,xmm2,xmm3) + a3(vpxor xmm1,xmm1,xmm2) + a3(vprotd xmm1,xmm1,12) + a3(vpaddd xmm0,xmm0,xmm1) + a3(vpxor xmm3,xmm3,xmm0) + a3(vprotd xmm3,xmm3,8) + a3(vpaddd xmm2,xmm2,xmm3) + a3(vpshufd xmm0,xmm0,0x93) + a3(vpxor xmm1,xmm1,xmm2) + a3(vprotd xmm1,xmm1,7) + a3(vpshufd xmm3,xmm3,0x4e) + a3(vpaddd xmm0,xmm0,xmm1) + a3(vpshufd xmm2,xmm2,0x39) + a3(vpxor xmm3,xmm3,xmm0) + a3(vprotd xmm3,xmm3,16) + a3(vpaddd xmm2,xmm2,xmm3) + a3(vpxor xmm1,xmm1,xmm2) + a3(vprotd xmm1,xmm1,12) + a3(vpaddd xmm0,xmm0,xmm1) + a3(vpxor xmm3,xmm3,xmm0) + a3(vprotd xmm3,xmm3,8) + a3(vpaddd xmm2,xmm2,xmm3) + a3(vpxor xmm1,xmm1,xmm2) + a3(vpshufd xmm0,xmm0,0x39) + a3(vprotd xmm1,xmm1,7) + a3(pshufd xmm3,xmm3,0x4e) + a3(pshufd xmm2,xmm2,0x93) + a2(sub eax,2) + aj(ja scrypt_chacha_xop_loop) + a3(vpaddd xmm0,xmm0,xmm4) + a3(vpaddd xmm1,xmm1,xmm5) + a3(vpaddd xmm2,xmm2,xmm6) + a3(vpaddd xmm3,xmm3,xmm7) + a2(lea eax,[ebx+ecx]) + a2(xor ebx,edx) + a2(and eax,~0x7f) + a2(add ecx,64) + a2(shr eax,1) + a2(add eax, edi) + a2(cmp ecx,edx) + a2(vmovdqa [eax+0],xmm0) + a2(vmovdqa [eax+16],xmm1) + a2(vmovdqa [eax+32],xmm2) + a2(vmovdqa [eax+48],xmm3) + a2(mov eax,[ebp+28]) + aj(jne scrypt_ChunkMix_xop_loop) + a2(mov esp,ebp) + a1(pop ebp) + a1(pop esi) + a1(pop edi) + a1(pop ebx) + aret(16) +asm_naked_fn_end(scrypt_ChunkMix_xop) + +#endif + + + +/* x64 */ +#if defined(X86_64ASM_XOP) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS) + +#define SCRYPT_CHACHA_XOP + +asm_naked_fn_proto(void, scrypt_ChunkMix_xop)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) +asm_naked_fn(scrypt_ChunkMix_xop) + a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */ + a2(shl rcx,6) + a2(lea r9,[rcx-64]) + a2(lea rax,[rsi+r9]) + a2(lea r9,[rdx+r9]) + a2(and rdx, rdx) + a2(vmovdqa xmm0,[rax+0]) + a2(vmovdqa xmm1,[rax+16]) + a2(vmovdqa xmm2,[rax+32]) + a2(vmovdqa xmm3,[rax+48]) + aj(jz scrypt_ChunkMix_xop_no_xor1) + a3(vpxor xmm0,xmm0,[r9+0]) + a3(vpxor xmm1,xmm1,[r9+16]) + a3(vpxor xmm2,xmm2,[r9+32]) + a3(vpxor xmm3,xmm3,[r9+48]) + a1(scrypt_ChunkMix_xop_no_xor1:) + a2(xor r8,r8) + a2(xor r9,r9) + a1(scrypt_ChunkMix_xop_loop:) + a2(and rdx, rdx) + a3(vpxor xmm0,xmm0,[rsi+r9+0]) + a3(vpxor xmm1,xmm1,[rsi+r9+16]) + a3(vpxor xmm2,xmm2,[rsi+r9+32]) + a3(vpxor xmm3,xmm3,[rsi+r9+48]) + aj(jz scrypt_ChunkMix_xop_no_xor2) + a3(vpxor xmm0,xmm0,[rdx+r9+0]) + a3(vpxor xmm1,xmm1,[rdx+r9+16]) + a3(vpxor xmm2,xmm2,[rdx+r9+32]) + a3(vpxor xmm3,xmm3,[rdx+r9+48]) + a1(scrypt_ChunkMix_xop_no_xor2:) + a2(vmovdqa xmm4,xmm0) + a2(vmovdqa xmm5,xmm1) + a2(vmovdqa xmm6,xmm2) + a2(vmovdqa xmm7,xmm3) + a2(mov rax,8) + a1(scrypt_chacha_xop_loop: ) + a3(vpaddd xmm0,xmm0,xmm1) + a3(vpxor xmm3,xmm3,xmm0) + a3(vprotd xmm3,xmm3,16) + a3(vpaddd xmm2,xmm2,xmm3) + a3(vpxor xmm1,xmm1,xmm2) + a3(vprotd xmm1,xmm1,12) + a3(vpaddd xmm0,xmm0,xmm1) + a3(vpxor xmm3,xmm3,xmm0) + a3(vprotd xmm3,xmm3,8) + a3(vpaddd xmm2,xmm2,xmm3) + a3(vpshufd xmm0,xmm0,0x93) + a3(vpxor xmm1,xmm1,xmm2) + a3(vprotd xmm1,xmm1,7) + a3(vpshufd xmm3,xmm3,0x4e) + a3(vpaddd xmm0,xmm0,xmm1) + a3(vpshufd xmm2,xmm2,0x39) + a3(vpxor xmm3,xmm3,xmm0) + a3(vprotd xmm3,xmm3,16) + a3(vpaddd xmm2,xmm2,xmm3) + a3(vpxor xmm1,xmm1,xmm2) + a3(vprotd xmm1,xmm1,12) + a3(vpaddd xmm0,xmm0,xmm1) + a3(vpxor xmm3,xmm3,xmm0) + a3(vprotd xmm3,xmm3,8) + a3(vpaddd xmm2,xmm2,xmm3) + a3(vpxor xmm1,xmm1,xmm2) + a3(vpshufd xmm0,xmm0,0x39) + a3(vprotd xmm1,xmm1,7) + a3(pshufd xmm3,xmm3,0x4e) + a3(pshufd xmm2,xmm2,0x93) + a2(sub rax,2) + aj(ja scrypt_chacha_xop_loop) + a3(vpaddd xmm0,xmm0,xmm4) + a3(vpaddd xmm1,xmm1,xmm5) + a3(vpaddd xmm2,xmm2,xmm6) + a3(vpaddd xmm3,xmm3,xmm7) + a2(lea rax,[r8+r9]) + a2(xor r8,rcx) + a2(and rax,~0x7f) + a2(add r9,64) + a2(shr rax,1) + a2(add rax, rdi) + a2(cmp r9,rcx) + a2(vmovdqa [rax+0],xmm0) + a2(vmovdqa [rax+16],xmm1) + a2(vmovdqa [rax+32],xmm2) + a2(vmovdqa [rax+48],xmm3) + aj(jne scrypt_ChunkMix_xop_loop) + a1(ret) +asm_naked_fn_end(scrypt_ChunkMix_xop) + +#endif + + +/* intrinsic */ +#if defined(X86_INTRINSIC_XOP) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED)) + +#define SCRYPT_CHACHA_XOP + +static void asm_calling_convention NOINLINE +scrypt_ChunkMix_xop(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) { + uint32_t i, blocksPerChunk = r * 2, half = 0; + xmmi *xmmp,x0,x1,x2,x3,x6,t0,t1,t2,t3; + size_t rounds; + + /* 1: X = B_{2r - 1} */ + xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1); + x0 = xmmp[0]; + x1 = xmmp[1]; + x2 = xmmp[2]; + x3 = xmmp[3]; + + if (Bxor) { + xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1); + x0 = _mm_xor_si128(x0, xmmp[0]); + x1 = _mm_xor_si128(x1, xmmp[1]); + x2 = _mm_xor_si128(x2, xmmp[2]); + x3 = _mm_xor_si128(x3, xmmp[3]); + } + + /* 2: for i = 0 to 2r - 1 do */ + for (i = 0; i < blocksPerChunk; i++, half ^= r) { + /* 3: X = H(X ^ B_i) */ + xmmp = (xmmi *)scrypt_block(Bin, i); + x0 = _mm_xor_si128(x0, xmmp[0]); + x1 = _mm_xor_si128(x1, xmmp[1]); + x2 = _mm_xor_si128(x2, xmmp[2]); + x3 = _mm_xor_si128(x3, xmmp[3]); + + if (Bxor) { + xmmp = (xmmi *)scrypt_block(Bxor, i); + x0 = _mm_xor_si128(x0, xmmp[0]); + x1 = _mm_xor_si128(x1, xmmp[1]); + x2 = _mm_xor_si128(x2, xmmp[2]); + x3 = _mm_xor_si128(x3, xmmp[3]); + } + + t0 = x0; + t1 = x1; + t2 = x2; + t3 = x3; + + for (rounds = 8; rounds; rounds -= 2) { + x0 = _mm_add_epi32(x0, x1); + x3 = _mm_xor_si128(x3, x0); + x3 = _mm_roti_epi32(x3, 16); + x2 = _mm_add_epi32(x2, x3); + x1 = _mm_xor_si128(x1, x2); + x1 = _mm_roti_epi32(x1, 12); + x0 = _mm_add_epi32(x0, x1); + x3 = _mm_xor_si128(x3, x0); + x3 = _mm_roti_epi32(x3, 8); + x2 = _mm_add_epi32(x2, x3); + x0 = _mm_shuffle_epi32(x0, 0x93); + x1 = _mm_xor_si128(x1, x2); + x1 = _mm_roti_epi32(x1, 7); + x3 = _mm_shuffle_epi32(x3, 0x4e); + x0 = _mm_add_epi32(x0, x1); + x2 = _mm_shuffle_epi32(x2, 0x39); + x3 = _mm_xor_si128(x3, x0); + x3 = _mm_roti_epi32(x3, 16); + x2 = _mm_add_epi32(x2, x3); + x1 = _mm_xor_si128(x1, x2); + x1 = _mm_roti_epi32(x1, 12); + x0 = _mm_add_epi32(x0, x1); + x3 = _mm_xor_si128(x3, x0); + x3 = _mm_roti_epi32(x3, 8); + x2 = _mm_add_epi32(x2, x3); + x1 = _mm_xor_si128(x1, x2); + x0 = _mm_shuffle_epi32(x0, 0x39); + x1 = _mm_roti_epi32(x1, 7); + x3 = _mm_shuffle_epi32(x3, 0x4e); + x2 = _mm_shuffle_epi32(x2, 0x93); + } + + x0 = _mm_add_epi32(x0, t0); + x1 = _mm_add_epi32(x1, t1); + x2 = _mm_add_epi32(x2, t2); + x3 = _mm_add_epi32(x3, t3); + + /* 4: Y_i = X */ + /* 6: B'[0..r-1] = Y_even */ + /* 6: B'[r..2r-1] = Y_odd */ + xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half); + xmmp[0] = x0; + xmmp[1] = x1; + xmmp[2] = x2; + xmmp[3] = x3; + } +} + +#endif + +#if defined(SCRYPT_CHACHA_XOP) + #undef SCRYPT_MIX + #define SCRYPT_MIX "ChaCha/8-XOP" + #undef SCRYPT_CHACHA_INCLUDED + #define SCRYPT_CHACHA_INCLUDED +#endif diff --git a/bf/scrypt-jane/code/scrypt-jane-mix_chacha.h b/bf/scrypt-jane/code/scrypt-jane-mix_chacha.h new file mode 100644 index 0000000..85ee9c1 --- /dev/null +++ b/bf/scrypt-jane/code/scrypt-jane-mix_chacha.h @@ -0,0 +1,69 @@ +#if !defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED) + +#undef SCRYPT_MIX +#define SCRYPT_MIX "ChaCha20/8 Ref" + +#undef SCRYPT_CHACHA_INCLUDED +#define SCRYPT_CHACHA_INCLUDED +#define SCRYPT_CHACHA_BASIC + +static void +chacha_core_basic(uint32_t state[16]) { + size_t rounds = 8; + uint32_t x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15,t; + + x0 = state[0]; + x1 = state[1]; + x2 = state[2]; + x3 = state[3]; + x4 = state[4]; + x5 = state[5]; + x6 = state[6]; + x7 = state[7]; + x8 = state[8]; + x9 = state[9]; + x10 = state[10]; + x11 = state[11]; + x12 = state[12]; + x13 = state[13]; + x14 = state[14]; + x15 = state[15]; + + #define quarter(a,b,c,d) \ + a += b; t = d^a; d = ROTL32(t,16); \ + c += d; t = b^c; b = ROTL32(t,12); \ + a += b; t = d^a; d = ROTL32(t, 8); \ + c += d; t = b^c; b = ROTL32(t, 7); + + for (; rounds; rounds -= 2) { + quarter( x0, x4, x8,x12) + quarter( x1, x5, x9,x13) + quarter( x2, x6,x10,x14) + quarter( x3, x7,x11,x15) + quarter( x0, x5,x10,x15) + quarter( x1, x6,x11,x12) + quarter( x2, x7, x8,x13) + quarter( x3, x4, x9,x14) + } + + state[0] += x0; + state[1] += x1; + state[2] += x2; + state[3] += x3; + state[4] += x4; + state[5] += x5; + state[6] += x6; + state[7] += x7; + state[8] += x8; + state[9] += x9; + state[10] += x10; + state[11] += x11; + state[12] += x12; + state[13] += x13; + state[14] += x14; + state[15] += x15; + + #undef quarter +} + +#endif \ No newline at end of file diff --git a/bf/scrypt-jane/code/scrypt-jane-mix_salsa-avx.h b/bf/scrypt-jane/code/scrypt-jane-mix_salsa-avx.h new file mode 100644 index 0000000..259fae4 --- /dev/null +++ b/bf/scrypt-jane/code/scrypt-jane-mix_salsa-avx.h @@ -0,0 +1,381 @@ +/* x86 */ +#if defined(X86ASM_AVX) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS) + +#define SCRYPT_SALSA_AVX + +asm_naked_fn_proto(void, scrypt_ChunkMix_avx)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) +asm_naked_fn(scrypt_ChunkMix_avx) + a1(push ebx) + a1(push edi) + a1(push esi) + a1(push ebp) + a2(mov ebp,esp) + a2(mov edi,[ebp+20]) + a2(mov esi,[ebp+24]) + a2(mov eax,[ebp+28]) + a2(mov ebx,[ebp+32]) + a2(sub esp,32) + a2(and esp,~63) + a2(lea edx,[ebx*2]) + a2(shl edx,6) + a2(lea ecx,[edx-64]) + a2(and eax, eax) + a2(movdqa xmm0,[ecx+esi+0]) + a2(movdqa xmm1,[ecx+esi+16]) + a2(movdqa xmm2,[ecx+esi+32]) + a2(movdqa xmm3,[ecx+esi+48]) + aj(jz scrypt_ChunkMix_avx_no_xor1) + a3(vpxor xmm0,xmm0,[ecx+eax+0]) + a3(vpxor xmm1,xmm1,[ecx+eax+16]) + a3(vpxor xmm2,xmm2,[ecx+eax+32]) + a3(vpxor xmm3,xmm3,[ecx+eax+48]) + a1(scrypt_ChunkMix_avx_no_xor1:) + a2(xor ecx,ecx) + a2(xor ebx,ebx) + a1(scrypt_ChunkMix_avx_loop:) + a2(and eax, eax) + a3(vpxor xmm0,xmm0,[esi+ecx+0]) + a3(vpxor xmm1,xmm1,[esi+ecx+16]) + a3(vpxor xmm2,xmm2,[esi+ecx+32]) + a3(vpxor xmm3,xmm3,[esi+ecx+48]) + aj(jz scrypt_ChunkMix_avx_no_xor2) + a3(vpxor xmm0,xmm0,[eax+ecx+0]) + a3(vpxor xmm1,xmm1,[eax+ecx+16]) + a3(vpxor xmm2,xmm2,[eax+ecx+32]) + a3(vpxor xmm3,xmm3,[eax+ecx+48]) + a1(scrypt_ChunkMix_avx_no_xor2:) + a2(vmovdqa [esp+0],xmm0) + a2(vmovdqa [esp+16],xmm1) + a2(vmovdqa xmm6,xmm2) + a2(vmovdqa xmm7,xmm3) + a2(mov eax,8) + a1(scrypt_salsa_avx_loop: ) + a3(vpaddd xmm4, xmm1, xmm0) + a3(vpsrld xmm5, xmm4, 25) + a3(vpslld xmm4, xmm4, 7) + a3(vpxor xmm3, xmm3, xmm5) + a3(vpxor xmm3, xmm3, xmm4) + a3(vpaddd xmm4, xmm0, xmm3) + a3(vpsrld xmm5, xmm4, 23) + a3(vpslld xmm4, xmm4, 9) + a3(vpxor xmm2, xmm2, xmm5) + a3(vpxor xmm2, xmm2, xmm4) + a3(vpaddd xmm4, xmm3, xmm2) + a3(vpsrld xmm5, xmm4, 19) + a3(vpslld xmm4, xmm4, 13) + a3(vpxor xmm1, xmm1, xmm5) + a3(vpshufd xmm3, xmm3, 0x93) + a3(vpxor xmm1, xmm1, xmm4) + a3(vpaddd xmm4, xmm2, xmm1) + a3(vpsrld xmm5, xmm4, 14) + a3(vpslld xmm4, xmm4, 18) + a3(vpxor xmm0, xmm0, xmm5) + a3(vpshufd xmm2, xmm2, 0x4e) + a3(vpxor xmm0, xmm0, xmm4) + a3(vpaddd xmm4, xmm3, xmm0) + a3(vpshufd xmm1, xmm1, 0x39) + a3(vpsrld xmm5, xmm4, 25) + a3(vpslld xmm4, xmm4, 7) + a3(vpxor xmm1, xmm1, xmm5) + a3(vpxor xmm1, xmm1, xmm4) + a3(vpaddd xmm4, xmm0, xmm1) + a3(vpsrld xmm5, xmm4, 23) + a3(vpslld xmm4, xmm4, 9) + a3(vpxor xmm2, xmm2, xmm5) + a3(vpxor xmm2, xmm2, xmm4) + a3(vpaddd xmm4, xmm1, xmm2) + a3(vpsrld xmm5, xmm4, 19) + a3(vpslld xmm4, xmm4, 13) + a3(vpxor xmm3, xmm3, xmm5) + a3(vpshufd xmm1, xmm1, 0x93) + a3(vpxor xmm3, xmm3, xmm4) + a3(vpaddd xmm4, xmm2, xmm3) + a3(vpsrld xmm5, xmm4, 14) + a3(vpslld xmm4, xmm4, 18) + a3(vpxor xmm0, xmm0, xmm5) + a3(vpshufd xmm2, xmm2, 0x4e) + a3(vpxor xmm0, xmm0, xmm4) + a3(vpshufd xmm3, xmm3, 0x39) + a2(sub eax, 2) + aj(ja scrypt_salsa_avx_loop) + a3(vpaddd xmm0,xmm0,[esp+0]) + a3(vpaddd xmm1,xmm1,[esp+16]) + a3(vpaddd xmm2,xmm2,xmm6) + a3(vpaddd xmm3,xmm3,xmm7) + a2(lea eax,[ebx+ecx]) + a2(xor ebx,edx) + a2(and eax,~0x7f) + a2(add ecx,64) + a2(shr eax,1) + a2(add eax, edi) + a2(cmp ecx,edx) + a2(vmovdqa [eax+0],xmm0) + a2(vmovdqa [eax+16],xmm1) + a2(vmovdqa [eax+32],xmm2) + a2(vmovdqa [eax+48],xmm3) + a2(mov eax,[ebp+28]) + aj(jne scrypt_ChunkMix_avx_loop) + a2(mov esp,ebp) + a1(pop ebp) + a1(pop esi) + a1(pop edi) + a1(pop ebx) + aret(16) +asm_naked_fn_end(scrypt_ChunkMix_avx) + +#endif + + + +/* x64 */ +#if defined(X86_64ASM_AVX) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS) + +#define SCRYPT_SALSA_AVX + +asm_naked_fn_proto(void, scrypt_ChunkMix_avx)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) +asm_naked_fn(scrypt_ChunkMix_avx) + a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */ + a2(shl rcx,6) + a2(lea r9,[rcx-64]) + a2(lea rax,[rsi+r9]) + a2(lea r9,[rdx+r9]) + a2(and rdx, rdx) + a2(vmovdqa xmm0,[rax+0]) + a2(vmovdqa xmm1,[rax+16]) + a2(vmovdqa xmm2,[rax+32]) + a2(vmovdqa xmm3,[rax+48]) + aj(jz scrypt_ChunkMix_avx_no_xor1) + a3(vpxor xmm0,xmm0,[r9+0]) + a3(vpxor xmm1,xmm1,[r9+16]) + a3(vpxor xmm2,xmm2,[r9+32]) + a3(vpxor xmm3,xmm3,[r9+48]) + a1(scrypt_ChunkMix_avx_no_xor1:) + a2(xor r9,r9) + a2(xor r8,r8) + a1(scrypt_ChunkMix_avx_loop:) + a2(and rdx, rdx) + a3(vpxor xmm0,xmm0,[rsi+r9+0]) + a3(vpxor xmm1,xmm1,[rsi+r9+16]) + a3(vpxor xmm2,xmm2,[rsi+r9+32]) + a3(vpxor xmm3,xmm3,[rsi+r9+48]) + aj(jz scrypt_ChunkMix_avx_no_xor2) + a3(vpxor xmm0,xmm0,[rdx+r9+0]) + a3(vpxor xmm1,xmm1,[rdx+r9+16]) + a3(vpxor xmm2,xmm2,[rdx+r9+32]) + a3(vpxor xmm3,xmm3,[rdx+r9+48]) + a1(scrypt_ChunkMix_avx_no_xor2:) + a2(vmovdqa xmm8,xmm0) + a2(vmovdqa xmm9,xmm1) + a2(vmovdqa xmm10,xmm2) + a2(vmovdqa xmm11,xmm3) + a2(mov rax,8) + a1(scrypt_salsa_avx_loop: ) + a3(vpaddd xmm4, xmm1, xmm0) + a3(vpsrld xmm5, xmm4, 25) + a3(vpslld xmm4, xmm4, 7) + a3(vpxor xmm3, xmm3, xmm5) + a3(vpxor xmm3, xmm3, xmm4) + a3(vpaddd xmm4, xmm0, xmm3) + a3(vpsrld xmm5, xmm4, 23) + a3(vpslld xmm4, xmm4, 9) + a3(vpxor xmm2, xmm2, xmm5) + a3(vpxor xmm2, xmm2, xmm4) + a3(vpaddd xmm4, xmm3, xmm2) + a3(vpsrld xmm5, xmm4, 19) + a3(vpslld xmm4, xmm4, 13) + a3(vpxor xmm1, xmm1, xmm5) + a3(vpshufd xmm3, xmm3, 0x93) + a3(vpxor xmm1, xmm1, xmm4) + a3(vpaddd xmm4, xmm2, xmm1) + a3(vpsrld xmm5, xmm4, 14) + a3(vpslld xmm4, xmm4, 18) + a3(vpxor xmm0, xmm0, xmm5) + a3(vpshufd xmm2, xmm2, 0x4e) + a3(vpxor xmm0, xmm0, xmm4) + a3(vpaddd xmm4, xmm3, xmm0) + a3(vpshufd xmm1, xmm1, 0x39) + a3(vpsrld xmm5, xmm4, 25) + a3(vpslld xmm4, xmm4, 7) + a3(vpxor xmm1, xmm1, xmm5) + a3(vpxor xmm1, xmm1, xmm4) + a3(vpaddd xmm4, xmm0, xmm1) + a3(vpsrld xmm5, xmm4, 23) + a3(vpslld xmm4, xmm4, 9) + a3(vpxor xmm2, xmm2, xmm5) + a3(vpxor xmm2, xmm2, xmm4) + a3(vpaddd xmm4, xmm1, xmm2) + a3(vpsrld xmm5, xmm4, 19) + a3(vpslld xmm4, xmm4, 13) + a3(vpxor xmm3, xmm3, xmm5) + a3(vpshufd xmm1, xmm1, 0x93) + a3(vpxor xmm3, xmm3, xmm4) + a3(vpaddd xmm4, xmm2, xmm3) + a3(vpsrld xmm5, xmm4, 14) + a3(vpslld xmm4, xmm4, 18) + a3(vpxor xmm0, xmm0, xmm5) + a3(vpshufd xmm2, xmm2, 0x4e) + a3(vpxor xmm0, xmm0, xmm4) + a3(vpshufd xmm3, xmm3, 0x39) + a2(sub rax, 2) + aj(ja scrypt_salsa_avx_loop) + a3(vpaddd xmm0,xmm0,xmm8) + a3(vpaddd xmm1,xmm1,xmm9) + a3(vpaddd xmm2,xmm2,xmm10) + a3(vpaddd xmm3,xmm3,xmm11) + a2(lea rax,[r8+r9]) + a2(xor r8,rcx) + a2(and rax,~0x7f) + a2(add r9,64) + a2(shr rax,1) + a2(add rax, rdi) + a2(cmp r9,rcx) + a2(vmovdqa [rax+0],xmm0) + a2(vmovdqa [rax+16],xmm1) + a2(vmovdqa [rax+32],xmm2) + a2(vmovdqa [rax+48],xmm3) + aj(jne scrypt_ChunkMix_avx_loop) + a1(ret) +asm_naked_fn_end(scrypt_ChunkMix_avx) + +#endif + + +/* intrinsic */ +#if defined(X86_INTRINSIC_AVX) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA_INCLUDED)) + +#define SCRYPT_SALSA_AVX + +static void asm_calling_convention NOINLINE +scrypt_ChunkMix_avx(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) { + uint32_t i, blocksPerChunk = r * 2, half = 0; + xmmi *xmmp,x0,x1,x2,x3,x4,x5,t0,t1,t2,t3; + size_t rounds; + + /* 1: X = B_{2r - 1} */ + xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1); + x0 = xmmp[0]; + x1 = xmmp[1]; + x2 = xmmp[2]; + x3 = xmmp[3]; + + if (Bxor) { + xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1); + x0 = _mm_xor_si128(x0, xmmp[0]); + x1 = _mm_xor_si128(x1, xmmp[1]); + x2 = _mm_xor_si128(x2, xmmp[2]); + x3 = _mm_xor_si128(x3, xmmp[3]); + } + + /* 2: for i = 0 to 2r - 1 do */ + for (i = 0; i < blocksPerChunk; i++, half ^= r) { + /* 3: X = H(X ^ B_i) */ + xmmp = (xmmi *)scrypt_block(Bin, i); + x0 = _mm_xor_si128(x0, xmmp[0]); + x1 = _mm_xor_si128(x1, xmmp[1]); + x2 = _mm_xor_si128(x2, xmmp[2]); + x3 = _mm_xor_si128(x3, xmmp[3]); + + if (Bxor) { + xmmp = (xmmi *)scrypt_block(Bxor, i); + x0 = _mm_xor_si128(x0, xmmp[0]); + x1 = _mm_xor_si128(x1, xmmp[1]); + x2 = _mm_xor_si128(x2, xmmp[2]); + x3 = _mm_xor_si128(x3, xmmp[3]); + } + + t0 = x0; + t1 = x1; + t2 = x2; + t3 = x3; + + for (rounds = 8; rounds; rounds -= 2) { + x4 = x1; + x4 = _mm_add_epi32(x4, x0); + x5 = x4; + x4 = _mm_slli_epi32(x4, 7); + x5 = _mm_srli_epi32(x5, 25); + x3 = _mm_xor_si128(x3, x4); + x4 = x0; + x3 = _mm_xor_si128(x3, x5); + x4 = _mm_add_epi32(x4, x3); + x5 = x4; + x4 = _mm_slli_epi32(x4, 9); + x5 = _mm_srli_epi32(x5, 23); + x2 = _mm_xor_si128(x2, x4); + x4 = x3; + x2 = _mm_xor_si128(x2, x5); + x3 = _mm_shuffle_epi32(x3, 0x93); + x4 = _mm_add_epi32(x4, x2); + x5 = x4; + x4 = _mm_slli_epi32(x4, 13); + x5 = _mm_srli_epi32(x5, 19); + x1 = _mm_xor_si128(x1, x4); + x4 = x2; + x1 = _mm_xor_si128(x1, x5); + x2 = _mm_shuffle_epi32(x2, 0x4e); + x4 = _mm_add_epi32(x4, x1); + x5 = x4; + x4 = _mm_slli_epi32(x4, 18); + x5 = _mm_srli_epi32(x5, 14); + x0 = _mm_xor_si128(x0, x4); + x4 = x3; + x0 = _mm_xor_si128(x0, x5); + x1 = _mm_shuffle_epi32(x1, 0x39); + x4 = _mm_add_epi32(x4, x0); + x5 = x4; + x4 = _mm_slli_epi32(x4, 7); + x5 = _mm_srli_epi32(x5, 25); + x1 = _mm_xor_si128(x1, x4); + x4 = x0; + x1 = _mm_xor_si128(x1, x5); + x4 = _mm_add_epi32(x4, x1); + x5 = x4; + x4 = _mm_slli_epi32(x4, 9); + x5 = _mm_srli_epi32(x5, 23); + x2 = _mm_xor_si128(x2, x4); + x4 = x1; + x2 = _mm_xor_si128(x2, x5); + x1 = _mm_shuffle_epi32(x1, 0x93); + x4 = _mm_add_epi32(x4, x2); + x5 = x4; + x4 = _mm_slli_epi32(x4, 13); + x5 = _mm_srli_epi32(x5, 19); + x3 = _mm_xor_si128(x3, x4); + x4 = x2; + x3 = _mm_xor_si128(x3, x5); + x2 = _mm_shuffle_epi32(x2, 0x4e); + x4 = _mm_add_epi32(x4, x3); + x5 = x4; + x4 = _mm_slli_epi32(x4, 18); + x5 = _mm_srli_epi32(x5, 14); + x0 = _mm_xor_si128(x0, x4); + x3 = _mm_shuffle_epi32(x3, 0x39); + x0 = _mm_xor_si128(x0, x5); + } + + x0 = _mm_add_epi32(x0, t0); + x1 = _mm_add_epi32(x1, t1); + x2 = _mm_add_epi32(x2, t2); + x3 = _mm_add_epi32(x3, t3); + + /* 4: Y_i = X */ + /* 6: B'[0..r-1] = Y_even */ + /* 6: B'[r..2r-1] = Y_odd */ + xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half); + xmmp[0] = x0; + xmmp[1] = x1; + xmmp[2] = x2; + xmmp[3] = x3; + } +} + +#endif + +#if defined(SCRYPT_SALSA_AVX) + /* uses salsa_core_tangle_sse2 */ + + #undef SCRYPT_MIX + #define SCRYPT_MIX "Salsa/8-AVX" + #undef SCRYPT_SALSA_INCLUDED + #define SCRYPT_SALSA_INCLUDED +#endif diff --git a/bf/scrypt-jane/code/scrypt-jane-mix_salsa-sse2.h b/bf/scrypt-jane/code/scrypt-jane-mix_salsa-sse2.h new file mode 100644 index 0000000..d7ef969 --- /dev/null +++ b/bf/scrypt-jane/code/scrypt-jane-mix_salsa-sse2.h @@ -0,0 +1,443 @@ +/* x86 */ +#if defined(X86ASM_SSE2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS) + +#define SCRYPT_SALSA_SSE2 + +asm_naked_fn_proto(void, scrypt_ChunkMix_sse2)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) +asm_naked_fn(scrypt_ChunkMix_sse2) + a1(push ebx) + a1(push edi) + a1(push esi) + a1(push ebp) + a2(mov ebp,esp) + a2(mov edi,[ebp+20]) + a2(mov esi,[ebp+24]) + a2(mov eax,[ebp+28]) + a2(mov ebx,[ebp+32]) + a2(sub esp,32) + a2(and esp,~63) + a2(lea edx,[ebx*2]) + a2(shl edx,6) + a2(lea ecx,[edx-64]) + a2(and eax, eax) + a2(movdqa xmm0,[ecx+esi+0]) + a2(movdqa xmm1,[ecx+esi+16]) + a2(movdqa xmm2,[ecx+esi+32]) + a2(movdqa xmm3,[ecx+esi+48]) + aj(jz scrypt_ChunkMix_sse2_no_xor1) + a2(pxor xmm0,[ecx+eax+0]) + a2(pxor xmm1,[ecx+eax+16]) + a2(pxor xmm2,[ecx+eax+32]) + a2(pxor xmm3,[ecx+eax+48]) + a1(scrypt_ChunkMix_sse2_no_xor1:) + a2(xor ecx,ecx) + a2(xor ebx,ebx) + a1(scrypt_ChunkMix_sse2_loop:) + a2(and eax, eax) + a2(pxor xmm0,[esi+ecx+0]) + a2(pxor xmm1,[esi+ecx+16]) + a2(pxor xmm2,[esi+ecx+32]) + a2(pxor xmm3,[esi+ecx+48]) + aj(jz scrypt_ChunkMix_sse2_no_xor2) + a2(pxor xmm0,[eax+ecx+0]) + a2(pxor xmm1,[eax+ecx+16]) + a2(pxor xmm2,[eax+ecx+32]) + a2(pxor xmm3,[eax+ecx+48]) + a1(scrypt_ChunkMix_sse2_no_xor2:) + a2(movdqa [esp+0],xmm0) + a2(movdqa [esp+16],xmm1) + a2(movdqa xmm6,xmm2) + a2(movdqa xmm7,xmm3) + a2(mov eax,8) + a1(scrypt_salsa_sse2_loop: ) + a2(movdqa xmm4, xmm1) + a2(paddd xmm4, xmm0) + a2(movdqa xmm5, xmm4) + a2(pslld xmm4, 7) + a2(psrld xmm5, 25) + a2(pxor xmm3, xmm4) + a2(movdqa xmm4, xmm0) + a2(pxor xmm3, xmm5) + a2(paddd xmm4, xmm3) + a2(movdqa xmm5, xmm4) + a2(pslld xmm4, 9) + a2(psrld xmm5, 23) + a2(pxor xmm2, xmm4) + a2(movdqa xmm4, xmm3) + a2(pxor xmm2, xmm5) + a3(pshufd xmm3, xmm3, 0x93) + a2(paddd xmm4, xmm2) + a2(movdqa xmm5, xmm4) + a2(pslld xmm4, 13) + a2(psrld xmm5, 19) + a2(pxor xmm1, xmm4) + a2(movdqa xmm4, xmm2) + a2(pxor xmm1, xmm5) + a3(pshufd xmm2, xmm2, 0x4e) + a2(paddd xmm4, xmm1) + a2(movdqa xmm5, xmm4) + a2(pslld xmm4, 18) + a2(psrld xmm5, 14) + a2(pxor xmm0, xmm4) + a2(movdqa xmm4, xmm3) + a2(pxor xmm0, xmm5) + a3(pshufd xmm1, xmm1, 0x39) + a2(paddd xmm4, xmm0) + a2(movdqa xmm5, xmm4) + a2(pslld xmm4, 7) + a2(psrld xmm5, 25) + a2(pxor xmm1, xmm4) + a2(movdqa xmm4, xmm0) + a2(pxor xmm1, xmm5) + a2(paddd xmm4, xmm1) + a2(movdqa xmm5, xmm4) + a2(pslld xmm4, 9) + a2(psrld xmm5, 23) + a2(pxor xmm2, xmm4) + a2(movdqa xmm4, xmm1) + a2(pxor xmm2, xmm5) + a3(pshufd xmm1, xmm1, 0x93) + a2(paddd xmm4, xmm2) + a2(movdqa xmm5, xmm4) + a2(pslld xmm4, 13) + a2(psrld xmm5, 19) + a2(pxor xmm3, xmm4) + a2(movdqa xmm4, xmm2) + a2(pxor xmm3, xmm5) + a3(pshufd xmm2, xmm2, 0x4e) + a2(paddd xmm4, xmm3) + a2(sub eax, 2) + a2(movdqa xmm5, xmm4) + a2(pslld xmm4, 18) + a2(psrld xmm5, 14) + a2(pxor xmm0, xmm4) + a3(pshufd xmm3, xmm3, 0x39) + a2(pxor xmm0, xmm5) + aj(ja scrypt_salsa_sse2_loop) + a2(paddd xmm0,[esp+0]) + a2(paddd xmm1,[esp+16]) + a2(paddd xmm2,xmm6) + a2(paddd xmm3,xmm7) + a2(lea eax,[ebx+ecx]) + a2(xor ebx,edx) + a2(and eax,~0x7f) + a2(add ecx,64) + a2(shr eax,1) + a2(add eax, edi) + a2(cmp ecx,edx) + a2(movdqa [eax+0],xmm0) + a2(movdqa [eax+16],xmm1) + a2(movdqa [eax+32],xmm2) + a2(movdqa [eax+48],xmm3) + a2(mov eax,[ebp+28]) + aj(jne scrypt_ChunkMix_sse2_loop) + a2(mov esp,ebp) + a1(pop ebp) + a1(pop esi) + a1(pop edi) + a1(pop ebx) + aret(16) +asm_naked_fn_end(scrypt_ChunkMix_sse2) + +#endif + + + +/* x64 */ +#if defined(X86_64ASM_SSE2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS) + +#define SCRYPT_SALSA_SSE2 + +asm_naked_fn_proto(void, scrypt_ChunkMix_sse2)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) +asm_naked_fn(scrypt_ChunkMix_sse2) + a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */ + a2(shl rcx,6) + a2(lea r9,[rcx-64]) + a2(lea rax,[rsi+r9]) + a2(lea r9,[rdx+r9]) + a2(and rdx, rdx) + a2(movdqa xmm0,[rax+0]) + a2(movdqa xmm1,[rax+16]) + a2(movdqa xmm2,[rax+32]) + a2(movdqa xmm3,[rax+48]) + aj(jz scrypt_ChunkMix_sse2_no_xor1) + a2(pxor xmm0,[r9+0]) + a2(pxor xmm1,[r9+16]) + a2(pxor xmm2,[r9+32]) + a2(pxor xmm3,[r9+48]) + a1(scrypt_ChunkMix_sse2_no_xor1:) + a2(xor r9,r9) + a2(xor r8,r8) + a1(scrypt_ChunkMix_sse2_loop:) + a2(and rdx, rdx) + a2(pxor xmm0,[rsi+r9+0]) + a2(pxor xmm1,[rsi+r9+16]) + a2(pxor xmm2,[rsi+r9+32]) + a2(pxor xmm3,[rsi+r9+48]) + aj(jz scrypt_ChunkMix_sse2_no_xor2) + a2(pxor xmm0,[rdx+r9+0]) + a2(pxor xmm1,[rdx+r9+16]) + a2(pxor xmm2,[rdx+r9+32]) + a2(pxor xmm3,[rdx+r9+48]) + a1(scrypt_ChunkMix_sse2_no_xor2:) + a2(movdqa xmm8,xmm0) + a2(movdqa xmm9,xmm1) + a2(movdqa xmm10,xmm2) + a2(movdqa xmm11,xmm3) + a2(mov rax,8) + a1(scrypt_salsa_sse2_loop: ) + a2(movdqa xmm4, xmm1) + a2(paddd xmm4, xmm0) + a2(movdqa xmm5, xmm4) + a2(pslld xmm4, 7) + a2(psrld xmm5, 25) + a2(pxor xmm3, xmm4) + a2(movdqa xmm4, xmm0) + a2(pxor xmm3, xmm5) + a2(paddd xmm4, xmm3) + a2(movdqa xmm5, xmm4) + a2(pslld xmm4, 9) + a2(psrld xmm5, 23) + a2(pxor xmm2, xmm4) + a2(movdqa xmm4, xmm3) + a2(pxor xmm2, xmm5) + a3(pshufd xmm3, xmm3, 0x93) + a2(paddd xmm4, xmm2) + a2(movdqa xmm5, xmm4) + a2(pslld xmm4, 13) + a2(psrld xmm5, 19) + a2(pxor xmm1, xmm4) + a2(movdqa xmm4, xmm2) + a2(pxor xmm1, xmm5) + a3(pshufd xmm2, xmm2, 0x4e) + a2(paddd xmm4, xmm1) + a2(movdqa xmm5, xmm4) + a2(pslld xmm4, 18) + a2(psrld xmm5, 14) + a2(pxor xmm0, xmm4) + a2(movdqa xmm4, xmm3) + a2(pxor xmm0, xmm5) + a3(pshufd xmm1, xmm1, 0x39) + a2(paddd xmm4, xmm0) + a2(movdqa xmm5, xmm4) + a2(pslld xmm4, 7) + a2(psrld xmm5, 25) + a2(pxor xmm1, xmm4) + a2(movdqa xmm4, xmm0) + a2(pxor xmm1, xmm5) + a2(paddd xmm4, xmm1) + a2(movdqa xmm5, xmm4) + a2(pslld xmm4, 9) + a2(psrld xmm5, 23) + a2(pxor xmm2, xmm4) + a2(movdqa xmm4, xmm1) + a2(pxor xmm2, xmm5) + a3(pshufd xmm1, xmm1, 0x93) + a2(paddd xmm4, xmm2) + a2(movdqa xmm5, xmm4) + a2(pslld xmm4, 13) + a2(psrld xmm5, 19) + a2(pxor xmm3, xmm4) + a2(movdqa xmm4, xmm2) + a2(pxor xmm3, xmm5) + a3(pshufd xmm2, xmm2, 0x4e) + a2(paddd xmm4, xmm3) + a2(sub rax, 2) + a2(movdqa xmm5, xmm4) + a2(pslld xmm4, 18) + a2(psrld xmm5, 14) + a2(pxor xmm0, xmm4) + a3(pshufd xmm3, xmm3, 0x39) + a2(pxor xmm0, xmm5) + aj(ja scrypt_salsa_sse2_loop) + a2(paddd xmm0,xmm8) + a2(paddd xmm1,xmm9) + a2(paddd xmm2,xmm10) + a2(paddd xmm3,xmm11) + a2(lea rax,[r8+r9]) + a2(xor r8,rcx) + a2(and rax,~0x7f) + a2(add r9,64) + a2(shr rax,1) + a2(add rax, rdi) + a2(cmp r9,rcx) + a2(movdqa [rax+0],xmm0) + a2(movdqa [rax+16],xmm1) + a2(movdqa [rax+32],xmm2) + a2(movdqa [rax+48],xmm3) + aj(jne scrypt_ChunkMix_sse2_loop) + a1(ret) +asm_naked_fn_end(scrypt_ChunkMix_sse2) + +#endif + + +/* intrinsic */ +#if defined(X86_INTRINSIC_SSE2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA_INCLUDED)) + +#define SCRYPT_SALSA_SSE2 + +static void NOINLINE asm_calling_convention +scrypt_ChunkMix_sse2(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) { + uint32_t i, blocksPerChunk = r * 2, half = 0; + xmmi *xmmp,x0,x1,x2,x3,x4,x5,t0,t1,t2,t3; + size_t rounds; + + /* 1: X = B_{2r - 1} */ + xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1); + x0 = xmmp[0]; + x1 = xmmp[1]; + x2 = xmmp[2]; + x3 = xmmp[3]; + + if (Bxor) { + xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1); + x0 = _mm_xor_si128(x0, xmmp[0]); + x1 = _mm_xor_si128(x1, xmmp[1]); + x2 = _mm_xor_si128(x2, xmmp[2]); + x3 = _mm_xor_si128(x3, xmmp[3]); + } + + /* 2: for i = 0 to 2r - 1 do */ + for (i = 0; i < blocksPerChunk; i++, half ^= r) { + /* 3: X = H(X ^ B_i) */ + xmmp = (xmmi *)scrypt_block(Bin, i); + x0 = _mm_xor_si128(x0, xmmp[0]); + x1 = _mm_xor_si128(x1, xmmp[1]); + x2 = _mm_xor_si128(x2, xmmp[2]); + x3 = _mm_xor_si128(x3, xmmp[3]); + + if (Bxor) { + xmmp = (xmmi *)scrypt_block(Bxor, i); + x0 = _mm_xor_si128(x0, xmmp[0]); + x1 = _mm_xor_si128(x1, xmmp[1]); + x2 = _mm_xor_si128(x2, xmmp[2]); + x3 = _mm_xor_si128(x3, xmmp[3]); + } + + t0 = x0; + t1 = x1; + t2 = x2; + t3 = x3; + + for (rounds = 8; rounds; rounds -= 2) { + x4 = x1; + x4 = _mm_add_epi32(x4, x0); + x5 = x4; + x4 = _mm_slli_epi32(x4, 7); + x5 = _mm_srli_epi32(x5, 25); + x3 = _mm_xor_si128(x3, x4); + x4 = x0; + x3 = _mm_xor_si128(x3, x5); + x4 = _mm_add_epi32(x4, x3); + x5 = x4; + x4 = _mm_slli_epi32(x4, 9); + x5 = _mm_srli_epi32(x5, 23); + x2 = _mm_xor_si128(x2, x4); + x4 = x3; + x2 = _mm_xor_si128(x2, x5); + x3 = _mm_shuffle_epi32(x3, 0x93); + x4 = _mm_add_epi32(x4, x2); + x5 = x4; + x4 = _mm_slli_epi32(x4, 13); + x5 = _mm_srli_epi32(x5, 19); + x1 = _mm_xor_si128(x1, x4); + x4 = x2; + x1 = _mm_xor_si128(x1, x5); + x2 = _mm_shuffle_epi32(x2, 0x4e); + x4 = _mm_add_epi32(x4, x1); + x5 = x4; + x4 = _mm_slli_epi32(x4, 18); + x5 = _mm_srli_epi32(x5, 14); + x0 = _mm_xor_si128(x0, x4); + x4 = x3; + x0 = _mm_xor_si128(x0, x5); + x1 = _mm_shuffle_epi32(x1, 0x39); + x4 = _mm_add_epi32(x4, x0); + x5 = x4; + x4 = _mm_slli_epi32(x4, 7); + x5 = _mm_srli_epi32(x5, 25); + x1 = _mm_xor_si128(x1, x4); + x4 = x0; + x1 = _mm_xor_si128(x1, x5); + x4 = _mm_add_epi32(x4, x1); + x5 = x4; + x4 = _mm_slli_epi32(x4, 9); + x5 = _mm_srli_epi32(x5, 23); + x2 = _mm_xor_si128(x2, x4); + x4 = x1; + x2 = _mm_xor_si128(x2, x5); + x1 = _mm_shuffle_epi32(x1, 0x93); + x4 = _mm_add_epi32(x4, x2); + x5 = x4; + x4 = _mm_slli_epi32(x4, 13); + x5 = _mm_srli_epi32(x5, 19); + x3 = _mm_xor_si128(x3, x4); + x4 = x2; + x3 = _mm_xor_si128(x3, x5); + x2 = _mm_shuffle_epi32(x2, 0x4e); + x4 = _mm_add_epi32(x4, x3); + x5 = x4; + x4 = _mm_slli_epi32(x4, 18); + x5 = _mm_srli_epi32(x5, 14); + x0 = _mm_xor_si128(x0, x4); + x3 = _mm_shuffle_epi32(x3, 0x39); + x0 = _mm_xor_si128(x0, x5); + } + + x0 = _mm_add_epi32(x0, t0); + x1 = _mm_add_epi32(x1, t1); + x2 = _mm_add_epi32(x2, t2); + x3 = _mm_add_epi32(x3, t3); + + /* 4: Y_i = X */ + /* 6: B'[0..r-1] = Y_even */ + /* 6: B'[r..2r-1] = Y_odd */ + xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half); + xmmp[0] = x0; + xmmp[1] = x1; + xmmp[2] = x2; + xmmp[3] = x3; + } +} + +#endif + +#if defined(SCRYPT_SALSA_SSE2) + #undef SCRYPT_MIX + #define SCRYPT_MIX "Salsa/8-SSE2" + #undef SCRYPT_SALSA_INCLUDED + #define SCRYPT_SALSA_INCLUDED +#endif + +/* used by avx,etc as well */ +#if defined(SCRYPT_SALSA_INCLUDED) + /* + Default layout: + 0 1 2 3 + 4 5 6 7 + 8 9 10 11 + 12 13 14 15 + + SSE2 layout: + 0 5 10 15 + 12 1 6 11 + 8 13 2 7 + 4 9 14 3 + */ + + static void asm_calling_convention + salsa_core_tangle_sse2(uint32_t *blocks, size_t count) { + uint32_t t; + while (count--) { + t = blocks[1]; blocks[1] = blocks[5]; blocks[5] = t; + t = blocks[2]; blocks[2] = blocks[10]; blocks[10] = t; + t = blocks[3]; blocks[3] = blocks[15]; blocks[15] = t; + t = blocks[4]; blocks[4] = blocks[12]; blocks[12] = t; + t = blocks[7]; blocks[7] = blocks[11]; blocks[11] = t; + t = blocks[9]; blocks[9] = blocks[13]; blocks[13] = t; + blocks += 16; + } + } +#endif + diff --git a/bf/scrypt-jane/code/scrypt-jane-mix_salsa-xop.h b/bf/scrypt-jane/code/scrypt-jane-mix_salsa-xop.h new file mode 100644 index 0000000..1d014d2 --- /dev/null +++ b/bf/scrypt-jane/code/scrypt-jane-mix_salsa-xop.h @@ -0,0 +1,317 @@ +/* x86 */ +#if defined(X86ASM_XOP) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS) + +#define SCRYPT_SALSA_XOP + +asm_naked_fn_proto(void, scrypt_ChunkMix_xop)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) +asm_naked_fn(scrypt_ChunkMix_xop) + a1(push ebx) + a1(push edi) + a1(push esi) + a1(push ebp) + a2(mov ebp,esp) + a2(mov edi,[ebp+20]) + a2(mov esi,[ebp+24]) + a2(mov eax,[ebp+28]) + a2(mov ebx,[ebp+32]) + a2(sub esp,32) + a2(and esp,~63) + a2(lea edx,[ebx*2]) + a2(shl edx,6) + a2(lea ecx,[edx-64]) + a2(and eax, eax) + a2(movdqa xmm0,[ecx+esi+0]) + a2(movdqa xmm1,[ecx+esi+16]) + a2(movdqa xmm2,[ecx+esi+32]) + a2(movdqa xmm3,[ecx+esi+48]) + aj(jz scrypt_ChunkMix_xop_no_xor1) + a3(vpxor xmm0,xmm0,[ecx+eax+0]) + a3(vpxor xmm1,xmm1,[ecx+eax+16]) + a3(vpxor xmm2,xmm2,[ecx+eax+32]) + a3(vpxor xmm3,xmm3,[ecx+eax+48]) + a1(scrypt_ChunkMix_xop_no_xor1:) + a2(xor ecx,ecx) + a2(xor ebx,ebx) + a1(scrypt_ChunkMix_xop_loop:) + a2(and eax, eax) + a3(vpxor xmm0,xmm0,[esi+ecx+0]) + a3(vpxor xmm1,xmm1,[esi+ecx+16]) + a3(vpxor xmm2,xmm2,[esi+ecx+32]) + a3(vpxor xmm3,xmm3,[esi+ecx+48]) + aj(jz scrypt_ChunkMix_xop_no_xor2) + a3(vpxor xmm0,xmm0,[eax+ecx+0]) + a3(vpxor xmm1,xmm1,[eax+ecx+16]) + a3(vpxor xmm2,xmm2,[eax+ecx+32]) + a3(vpxor xmm3,xmm3,[eax+ecx+48]) + a1(scrypt_ChunkMix_xop_no_xor2:) + a2(vmovdqa [esp+0],xmm0) + a2(vmovdqa [esp+16],xmm1) + a2(vmovdqa xmm6,xmm2) + a2(vmovdqa xmm7,xmm3) + a2(mov eax,8) + a1(scrypt_salsa_xop_loop: ) + a3(vpaddd xmm4, xmm1, xmm0) + a3(vprotd xmm4, xmm4, 7) + a3(vpxor xmm3, xmm3, xmm4) + a3(vpaddd xmm4, xmm0, xmm3) + a3(vprotd xmm4, xmm4, 9) + a3(vpxor xmm2, xmm2, xmm4) + a3(vpaddd xmm4, xmm3, xmm2) + a3(vprotd xmm4, xmm4, 13) + a3(vpxor xmm1, xmm1, xmm4) + a3(vpaddd xmm4, xmm2, xmm1) + a3(pshufd xmm3, xmm3, 0x93) + a3(vprotd xmm4, xmm4, 18) + a3(pshufd xmm2, xmm2, 0x4e) + a3(vpxor xmm0, xmm0, xmm4) + a3(pshufd xmm1, xmm1, 0x39) + a3(vpaddd xmm4, xmm3, xmm0) + a3(vprotd xmm4, xmm4, 7) + a3(vpxor xmm1, xmm1, xmm4) + a3(vpaddd xmm4, xmm0, xmm1) + a3(vprotd xmm4, xmm4, 9) + a3(vpxor xmm2, xmm2, xmm4) + a3(vpaddd xmm4, xmm1, xmm2) + a3(vprotd xmm4, xmm4, 13) + a3(vpxor xmm3, xmm3, xmm4) + a3(pshufd xmm1, xmm1, 0x93) + a3(vpaddd xmm4, xmm2, xmm3) + a3(pshufd xmm2, xmm2, 0x4e) + a3(vprotd xmm4, xmm4, 18) + a3(pshufd xmm3, xmm3, 0x39) + a3(vpxor xmm0, xmm0, xmm4) + a2(sub eax, 2) + aj(ja scrypt_salsa_xop_loop) + a3(vpaddd xmm0,xmm0,[esp+0]) + a3(vpaddd xmm1,xmm1,[esp+16]) + a3(vpaddd xmm2,xmm2,xmm6) + a3(vpaddd xmm3,xmm3,xmm7) + a2(lea eax,[ebx+ecx]) + a2(xor ebx,edx) + a2(and eax,~0x7f) + a2(add ecx,64) + a2(shr eax,1) + a2(add eax, edi) + a2(cmp ecx,edx) + a2(vmovdqa [eax+0],xmm0) + a2(vmovdqa [eax+16],xmm1) + a2(vmovdqa [eax+32],xmm2) + a2(vmovdqa [eax+48],xmm3) + a2(mov eax,[ebp+28]) + aj(jne scrypt_ChunkMix_xop_loop) + a2(mov esp,ebp) + a1(pop ebp) + a1(pop esi) + a1(pop edi) + a1(pop ebx) + aret(16) +asm_naked_fn_end(scrypt_ChunkMix_xop) + +#endif + + + +/* x64 */ +#if defined(X86_64ASM_XOP) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS) + +#define SCRYPT_SALSA_XOP + +asm_naked_fn_proto(void, scrypt_ChunkMix_xop)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) +asm_naked_fn(scrypt_ChunkMix_xop) + a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */ + a2(shl rcx,6) + a2(lea r9,[rcx-64]) + a2(lea rax,[rsi+r9]) + a2(lea r9,[rdx+r9]) + a2(and rdx, rdx) + a2(vmovdqa xmm0,[rax+0]) + a2(vmovdqa xmm1,[rax+16]) + a2(vmovdqa xmm2,[rax+32]) + a2(vmovdqa xmm3,[rax+48]) + aj(jz scrypt_ChunkMix_xop_no_xor1) + a3(vpxor xmm0,xmm0,[r9+0]) + a3(vpxor xmm1,xmm1,[r9+16]) + a3(vpxor xmm2,xmm2,[r9+32]) + a3(vpxor xmm3,xmm3,[r9+48]) + a1(scrypt_ChunkMix_xop_no_xor1:) + a2(xor r9,r9) + a2(xor r8,r8) + a1(scrypt_ChunkMix_xop_loop:) + a2(and rdx, rdx) + a3(vpxor xmm0,xmm0,[rsi+r9+0]) + a3(vpxor xmm1,xmm1,[rsi+r9+16]) + a3(vpxor xmm2,xmm2,[rsi+r9+32]) + a3(vpxor xmm3,xmm3,[rsi+r9+48]) + aj(jz scrypt_ChunkMix_xop_no_xor2) + a3(vpxor xmm0,xmm0,[rdx+r9+0]) + a3(vpxor xmm1,xmm1,[rdx+r9+16]) + a3(vpxor xmm2,xmm2,[rdx+r9+32]) + a3(vpxor xmm3,xmm3,[rdx+r9+48]) + a1(scrypt_ChunkMix_xop_no_xor2:) + a2(vmovdqa xmm8,xmm0) + a2(vmovdqa xmm9,xmm1) + a2(vmovdqa xmm10,xmm2) + a2(vmovdqa xmm11,xmm3) + a2(mov rax,8) + a1(scrypt_salsa_xop_loop: ) + a3(vpaddd xmm4, xmm1, xmm0) + a3(vprotd xmm4, xmm4, 7) + a3(vpxor xmm3, xmm3, xmm4) + a3(vpaddd xmm4, xmm0, xmm3) + a3(vprotd xmm4, xmm4, 9) + a3(vpxor xmm2, xmm2, xmm4) + a3(vpaddd xmm4, xmm3, xmm2) + a3(vprotd xmm4, xmm4, 13) + a3(vpxor xmm1, xmm1, xmm4) + a3(vpaddd xmm4, xmm2, xmm1) + a3(pshufd xmm3, xmm3, 0x93) + a3(vprotd xmm4, xmm4, 18) + a3(pshufd xmm2, xmm2, 0x4e) + a3(vpxor xmm0, xmm0, xmm4) + a3(pshufd xmm1, xmm1, 0x39) + a3(vpaddd xmm4, xmm3, xmm0) + a3(vprotd xmm4, xmm4, 7) + a3(vpxor xmm1, xmm1, xmm4) + a3(vpaddd xmm4, xmm0, xmm1) + a3(vprotd xmm4, xmm4, 9) + a3(vpxor xmm2, xmm2, xmm4) + a3(vpaddd xmm4, xmm1, xmm2) + a3(vprotd xmm4, xmm4, 13) + a3(vpxor xmm3, xmm3, xmm4) + a3(pshufd xmm1, xmm1, 0x93) + a3(vpaddd xmm4, xmm2, xmm3) + a3(pshufd xmm2, xmm2, 0x4e) + a3(vprotd xmm4, xmm4, 18) + a3(pshufd xmm3, xmm3, 0x39) + a3(vpxor xmm0, xmm0, xmm4) + a2(sub rax, 2) + aj(ja scrypt_salsa_xop_loop) + a3(vpaddd xmm0,xmm0,xmm8) + a3(vpaddd xmm1,xmm1,xmm9) + a3(vpaddd xmm2,xmm2,xmm10) + a3(vpaddd xmm3,xmm3,xmm11) + a2(lea rax,[r8+r9]) + a2(xor r8,rcx) + a2(and rax,~0x7f) + a2(add r9,64) + a2(shr rax,1) + a2(add rax, rdi) + a2(cmp r9,rcx) + a2(vmovdqa [rax+0],xmm0) + a2(vmovdqa [rax+16],xmm1) + a2(vmovdqa [rax+32],xmm2) + a2(vmovdqa [rax+48],xmm3) + aj(jne scrypt_ChunkMix_xop_loop) + a1(ret) +asm_naked_fn_end(scrypt_ChunkMix_xop) + +#endif + + +/* intrinsic */ +#if defined(X86_INTRINSIC_XOP) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA_INCLUDED)) + +#define SCRYPT_SALSA_XOP + +static void asm_calling_convention NOINLINE +scrypt_ChunkMix_xop(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) { + uint32_t i, blocksPerChunk = r * 2, half = 0; + xmmi *xmmp,x0,x1,x2,x3,x4,x5,t0,t1,t2,t3; + size_t rounds; + + /* 1: X = B_{2r - 1} */ + xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1); + x0 = xmmp[0]; + x1 = xmmp[1]; + x2 = xmmp[2]; + x3 = xmmp[3]; + + if (Bxor) { + xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1); + x0 = _mm_xor_si128(x0, xmmp[0]); + x1 = _mm_xor_si128(x1, xmmp[1]); + x2 = _mm_xor_si128(x2, xmmp[2]); + x3 = _mm_xor_si128(x3, xmmp[3]); + } + + /* 2: for i = 0 to 2r - 1 do */ + for (i = 0; i < blocksPerChunk; i++, half ^= r) { + /* 3: X = H(X ^ B_i) */ + xmmp = (xmmi *)scrypt_block(Bin, i); + x0 = _mm_xor_si128(x0, xmmp[0]); + x1 = _mm_xor_si128(x1, xmmp[1]); + x2 = _mm_xor_si128(x2, xmmp[2]); + x3 = _mm_xor_si128(x3, xmmp[3]); + + if (Bxor) { + xmmp = (xmmi *)scrypt_block(Bxor, i); + x0 = _mm_xor_si128(x0, xmmp[0]); + x1 = _mm_xor_si128(x1, xmmp[1]); + x2 = _mm_xor_si128(x2, xmmp[2]); + x3 = _mm_xor_si128(x3, xmmp[3]); + } + + t0 = x0; + t1 = x1; + t2 = x2; + t3 = x3; + + for (rounds = 8; rounds; rounds -= 2) { + x4 = _mm_add_epi32(x1, x0); + x4 = _mm_roti_epi32(x4, 7); + x3 = _mm_xor_si128(x3, x4); + x4 = _mm_add_epi32(x0, x3); + x4 = _mm_roti_epi32(x4, 9); + x2 = _mm_xor_si128(x2, x4); + x4 = _mm_add_epi32(x3, x2); + x4 = _mm_roti_epi32(x4, 13); + x1 = _mm_xor_si128(x1, x4); + x4 = _mm_add_epi32(x2, x1); + x4 = _mm_roti_epi32(x4, 18); + x0 = _mm_xor_si128(x0, x4); + x3 = _mm_shuffle_epi32(x3, 0x93); + x2 = _mm_shuffle_epi32(x2, 0x4e); + x1 = _mm_shuffle_epi32(x1, 0x39); + x4 = _mm_add_epi32(x3, x0); + x4 = _mm_roti_epi32(x4, 7); + x1 = _mm_xor_si128(x1, x4); + x4 = _mm_add_epi32(x0, x1); + x4 = _mm_roti_epi32(x4, 9); + x2 = _mm_xor_si128(x2, x4); + x4 = _mm_add_epi32(x1, x2); + x4 = _mm_roti_epi32(x4, 13); + x3 = _mm_xor_si128(x3, x4); + x4 = _mm_add_epi32(x2, x3); + x4 = _mm_roti_epi32(x4, 18); + x0 = _mm_xor_si128(x0, x4); + x1 = _mm_shuffle_epi32(x1, 0x93); + x2 = _mm_shuffle_epi32(x2, 0x4e); + x3 = _mm_shuffle_epi32(x3, 0x39); + } + + x0 = _mm_add_epi32(x0, t0); + x1 = _mm_add_epi32(x1, t1); + x2 = _mm_add_epi32(x2, t2); + x3 = _mm_add_epi32(x3, t3); + + /* 4: Y_i = X */ + /* 6: B'[0..r-1] = Y_even */ + /* 6: B'[r..2r-1] = Y_odd */ + xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half); + xmmp[0] = x0; + xmmp[1] = x1; + xmmp[2] = x2; + xmmp[3] = x3; + } +} + +#endif + +#if defined(SCRYPT_SALSA_XOP) + /* uses salsa_core_tangle_sse2 */ + + #undef SCRYPT_MIX + #define SCRYPT_MIX "Salsa/8-XOP" + #undef SCRYPT_SALSA_INCLUDED + #define SCRYPT_SALSA_INCLUDED +#endif diff --git a/bf/scrypt-jane/code/scrypt-jane-mix_salsa.h b/bf/scrypt-jane/code/scrypt-jane-mix_salsa.h new file mode 100644 index 0000000..33f3340 --- /dev/null +++ b/bf/scrypt-jane/code/scrypt-jane-mix_salsa.h @@ -0,0 +1,70 @@ +#if !defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA_INCLUDED) + +#undef SCRYPT_MIX +#define SCRYPT_MIX "Salsa20/8 Ref" + +#undef SCRYPT_SALSA_INCLUDED +#define SCRYPT_SALSA_INCLUDED +#define SCRYPT_SALSA_BASIC + +static void +salsa_core_basic(uint32_t state[16]) { + size_t rounds = 8; + uint32_t x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15,t; + + x0 = state[0]; + x1 = state[1]; + x2 = state[2]; + x3 = state[3]; + x4 = state[4]; + x5 = state[5]; + x6 = state[6]; + x7 = state[7]; + x8 = state[8]; + x9 = state[9]; + x10 = state[10]; + x11 = state[11]; + x12 = state[12]; + x13 = state[13]; + x14 = state[14]; + x15 = state[15]; + + #define quarter(a,b,c,d) \ + t = a+d; t = ROTL32(t, 7); b ^= t; \ + t = b+a; t = ROTL32(t, 9); c ^= t; \ + t = c+b; t = ROTL32(t, 13); d ^= t; \ + t = d+c; t = ROTL32(t, 18); a ^= t; \ + + for (; rounds; rounds -= 2) { + quarter( x0, x4, x8,x12) + quarter( x5, x9,x13, x1) + quarter(x10,x14, x2, x6) + quarter(x15, x3, x7,x11) + quarter( x0, x1, x2, x3) + quarter( x5, x6, x7, x4) + quarter(x10,x11, x8, x9) + quarter(x15,x12,x13,x14) + } + + state[0] += x0; + state[1] += x1; + state[2] += x2; + state[3] += x3; + state[4] += x4; + state[5] += x5; + state[6] += x6; + state[7] += x7; + state[8] += x8; + state[9] += x9; + state[10] += x10; + state[11] += x11; + state[12] += x12; + state[13] += x13; + state[14] += x14; + state[15] += x15; + + #undef quarter +} + +#endif + diff --git a/bf/scrypt-jane/code/scrypt-jane-mix_salsa64-avx.h b/bf/scrypt-jane/code/scrypt-jane-mix_salsa64-avx.h new file mode 100644 index 0000000..c6e41dc --- /dev/null +++ b/bf/scrypt-jane/code/scrypt-jane-mix_salsa64-avx.h @@ -0,0 +1,367 @@ +/* x64 */ +#if defined(X86_64ASM_AVX) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS) + +#define SCRYPT_SALSA64_AVX + +asm_naked_fn_proto(void, scrypt_ChunkMix_avx)(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r) +asm_naked_fn(scrypt_ChunkMix_avx) + a1(push rbp) + a2(mov rbp, rsp) + a2(and rsp, ~63) + a2(sub rsp, 128) + a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */ + a2(shl rcx,7) + a2(lea r9,[rcx-128]) + a2(lea rax,[rsi+r9]) + a2(lea r9,[rdx+r9]) + a2(and rdx, rdx) + a2(vmovdqa xmm0,[rax+0]) + a2(vmovdqa xmm1,[rax+16]) + a2(vmovdqa xmm2,[rax+32]) + a2(vmovdqa xmm3,[rax+48]) + a2(vmovdqa xmm4,[rax+64]) + a2(vmovdqa xmm5,[rax+80]) + a2(vmovdqa xmm6,[rax+96]) + a2(vmovdqa xmm7,[rax+112]) + aj(jz scrypt_ChunkMix_avx_no_xor1) + a3(vpxor xmm0,xmm0,[r9+0]) + a3(vpxor xmm1,xmm1,[r9+16]) + a3(vpxor xmm2,xmm2,[r9+32]) + a3(vpxor xmm3,xmm3,[r9+48]) + a3(vpxor xmm4,xmm4,[r9+64]) + a3(vpxor xmm5,xmm5,[r9+80]) + a3(vpxor xmm6,xmm6,[r9+96]) + a3(vpxor xmm7,xmm7,[r9+112]) + a1(scrypt_ChunkMix_avx_no_xor1:) + a2(xor r9,r9) + a2(xor r8,r8) + a1(scrypt_ChunkMix_avx_loop:) + a2(and rdx, rdx) + a3(vpxor xmm0,xmm0,[rsi+r9+0]) + a3(vpxor xmm1,xmm1,[rsi+r9+16]) + a3(vpxor xmm2,xmm2,[rsi+r9+32]) + a3(vpxor xmm3,xmm3,[rsi+r9+48]) + a3(vpxor xmm4,xmm4,[rsi+r9+64]) + a3(vpxor xmm5,xmm5,[rsi+r9+80]) + a3(vpxor xmm6,xmm6,[rsi+r9+96]) + a3(vpxor xmm7,xmm7,[rsi+r9+112]) + aj(jz scrypt_ChunkMix_avx_no_xor2) + a3(vpxor xmm0,xmm0,[rdx+r9+0]) + a3(vpxor xmm1,xmm1,[rdx+r9+16]) + a3(vpxor xmm2,xmm2,[rdx+r9+32]) + a3(vpxor xmm3,xmm3,[rdx+r9+48]) + a3(vpxor xmm4,xmm4,[rdx+r9+64]) + a3(vpxor xmm5,xmm5,[rdx+r9+80]) + a3(vpxor xmm6,xmm6,[rdx+r9+96]) + a3(vpxor xmm7,xmm7,[rdx+r9+112]) + a1(scrypt_ChunkMix_avx_no_xor2:) + a2(vmovdqa [rsp+0],xmm0) + a2(vmovdqa [rsp+16],xmm1) + a2(vmovdqa [rsp+32],xmm2) + a2(vmovdqa [rsp+48],xmm3) + a2(vmovdqa [rsp+64],xmm4) + a2(vmovdqa [rsp+80],xmm5) + a2(vmovdqa [rsp+96],xmm6) + a2(vmovdqa [rsp+112],xmm7) + a2(mov rax,8) + a1(scrypt_salsa64_avx_loop: ) + a3(vpaddq xmm8, xmm0, xmm2) + a3(vpaddq xmm9, xmm1, xmm3) + a3(vpshufd xmm8, xmm8, 0xb1) + a3(vpshufd xmm9, xmm9, 0xb1) + a3(vpxor xmm6, xmm6, xmm8) + a3(vpxor xmm7, xmm7, xmm9) + a3(vpaddq xmm10, xmm0, xmm6) + a3(vpaddq xmm11, xmm1, xmm7) + a3(vpsrlq xmm8, xmm10, 51) + a3(vpsrlq xmm9, xmm11, 51) + a3(vpsllq xmm10, xmm10, 13) + a3(vpsllq xmm11, xmm11, 13) + a3(vpxor xmm4, xmm4, xmm8) + a3(vpxor xmm5, xmm5, xmm9) + a3(vpxor xmm4, xmm4, xmm10) + a3(vpxor xmm5, xmm5, xmm11) + a3(vpaddq xmm8, xmm6, xmm4) + a3(vpaddq xmm9, xmm7, xmm5) + a3(vpsrlq xmm10, xmm8, 25) + a3(vpsrlq xmm11, xmm9, 25) + a3(vpsllq xmm8, xmm8, 39) + a3(vpsllq xmm9, xmm9, 39) + a3(vpxor xmm2, xmm2, xmm10) + a3(vpxor xmm3, xmm3, xmm11) + a3(vpxor xmm2, xmm2, xmm8) + a3(vpxor xmm3, xmm3, xmm9) + a3(vpaddq xmm10, xmm4, xmm2) + a3(vpaddq xmm11, xmm5, xmm3) + a3(vpshufd xmm10, xmm10, 0xb1) + a3(vpshufd xmm11, xmm11, 0xb1) + a3(vpxor xmm0, xmm0, xmm10) + a3(vpxor xmm1, xmm1, xmm11) + a2(vmovdqa xmm8, xmm2) + a2(vmovdqa xmm9, xmm3) + a4(vpalignr xmm2, xmm6, xmm7, 8) + a4(vpalignr xmm3, xmm7, xmm6, 8) + a4(vpalignr xmm6, xmm9, xmm8, 8) + a4(vpalignr xmm7, xmm8, xmm9, 8) + a3(vpaddq xmm10, xmm0, xmm2) + a3(vpaddq xmm11, xmm1, xmm3) + a3(vpshufd xmm10, xmm10, 0xb1) + a3(vpshufd xmm11, xmm11, 0xb1) + a3(vpxor xmm6, xmm6, xmm10) + a3(vpxor xmm7, xmm7, xmm11) + a3(vpaddq xmm8, xmm0, xmm6) + a3(vpaddq xmm9, xmm1, xmm7) + a3(vpsrlq xmm10, xmm8, 51) + a3(vpsrlq xmm11, xmm9, 51) + a3(vpsllq xmm8, xmm8, 13) + a3(vpsllq xmm9, xmm9, 13) + a3(vpxor xmm5, xmm5, xmm10) + a3(vpxor xmm4, xmm4, xmm11) + a3(vpxor xmm5, xmm5, xmm8) + a3(vpxor xmm4, xmm4, xmm9) + a3(vpaddq xmm10, xmm6, xmm5) + a3(vpaddq xmm11, xmm7, xmm4) + a3(vpsrlq xmm8, xmm10, 25) + a3(vpsrlq xmm9, xmm11, 25) + a3(vpsllq xmm10, xmm10, 39) + a3(vpsllq xmm11, xmm11, 39) + a3(vpxor xmm2, xmm2, xmm8) + a3(vpxor xmm3, xmm3, xmm9) + a3(vpxor xmm2, xmm2, xmm10) + a3(vpxor xmm3, xmm3, xmm11) + a3(vpaddq xmm8, xmm5, xmm2) + a3(vpaddq xmm9, xmm4, xmm3) + a3(vpshufd xmm8, xmm8, 0xb1) + a3(vpshufd xmm9, xmm9, 0xb1) + a3(vpxor xmm0, xmm0, xmm8) + a3(vpxor xmm1, xmm1, xmm9) + a2(vmovdqa xmm10, xmm2) + a2(vmovdqa xmm11, xmm3) + a4(vpalignr xmm2, xmm6, xmm7, 8) + a4(vpalignr xmm3, xmm7, xmm6, 8) + a4(vpalignr xmm6, xmm11, xmm10, 8) + a4(vpalignr xmm7, xmm10, xmm11, 8) + a2(sub rax, 2) + aj(ja scrypt_salsa64_avx_loop) + a3(vpaddq xmm0,xmm0,[rsp+0]) + a3(vpaddq xmm1,xmm1,[rsp+16]) + a3(vpaddq xmm2,xmm2,[rsp+32]) + a3(vpaddq xmm3,xmm3,[rsp+48]) + a3(vpaddq xmm4,xmm4,[rsp+64]) + a3(vpaddq xmm5,xmm5,[rsp+80]) + a3(vpaddq xmm6,xmm6,[rsp+96]) + a3(vpaddq xmm7,xmm7,[rsp+112]) + a2(lea rax,[r8+r9]) + a2(xor r8,rcx) + a2(and rax,~0xff) + a2(add r9,128) + a2(shr rax,1) + a2(add rax, rdi) + a2(cmp r9,rcx) + a2(vmovdqa [rax+0],xmm0) + a2(vmovdqa [rax+16],xmm1) + a2(vmovdqa [rax+32],xmm2) + a2(vmovdqa [rax+48],xmm3) + a2(vmovdqa [rax+64],xmm4) + a2(vmovdqa [rax+80],xmm5) + a2(vmovdqa [rax+96],xmm6) + a2(vmovdqa [rax+112],xmm7) + aj(jne scrypt_ChunkMix_avx_loop) + a2(mov rsp, rbp) + a1(pop rbp) + a1(ret) +asm_naked_fn_end(scrypt_ChunkMix_avx) + +#endif + + +/* intrinsic */ +#if defined(X86_INTRINSIC_AVX) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED)) + +#define SCRYPT_SALSA64_AVX + +static void asm_calling_convention +scrypt_ChunkMix_avx(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r) { + uint32_t i, blocksPerChunk = r * 2, half = 0; + xmmi *xmmp,x0,x1,x2,x3,x4,x5,x6,x7,t0,t1,t2,t3,t4,t5,t6,t7,z0,z1,z2,z3; + size_t rounds; + + /* 1: X = B_{2r - 1} */ + xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1); + x0 = xmmp[0]; + x1 = xmmp[1]; + x2 = xmmp[2]; + x3 = xmmp[3]; + x4 = xmmp[4]; + x5 = xmmp[5]; + x6 = xmmp[6]; + x7 = xmmp[7]; + + if (Bxor) { + xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1); + x0 = _mm_xor_si128(x0, xmmp[0]); + x1 = _mm_xor_si128(x1, xmmp[1]); + x2 = _mm_xor_si128(x2, xmmp[2]); + x3 = _mm_xor_si128(x3, xmmp[3]); + x4 = _mm_xor_si128(x4, xmmp[4]); + x5 = _mm_xor_si128(x5, xmmp[5]); + x6 = _mm_xor_si128(x6, xmmp[6]); + x7 = _mm_xor_si128(x7, xmmp[7]); + } + + /* 2: for i = 0 to 2r - 1 do */ + for (i = 0; i < blocksPerChunk; i++, half ^= r) { + /* 3: X = H(X ^ B_i) */ + xmmp = (xmmi *)scrypt_block(Bin, i); + x0 = _mm_xor_si128(x0, xmmp[0]); + x1 = _mm_xor_si128(x1, xmmp[1]); + x2 = _mm_xor_si128(x2, xmmp[2]); + x3 = _mm_xor_si128(x3, xmmp[3]); + x4 = _mm_xor_si128(x4, xmmp[4]); + x5 = _mm_xor_si128(x5, xmmp[5]); + x6 = _mm_xor_si128(x6, xmmp[6]); + x7 = _mm_xor_si128(x7, xmmp[7]); + + if (Bxor) { + xmmp = (xmmi *)scrypt_block(Bxor, i); + x0 = _mm_xor_si128(x0, xmmp[0]); + x1 = _mm_xor_si128(x1, xmmp[1]); + x2 = _mm_xor_si128(x2, xmmp[2]); + x3 = _mm_xor_si128(x3, xmmp[3]); + x4 = _mm_xor_si128(x4, xmmp[4]); + x5 = _mm_xor_si128(x5, xmmp[5]); + x6 = _mm_xor_si128(x6, xmmp[6]); + x7 = _mm_xor_si128(x7, xmmp[7]); + } + + t0 = x0; + t1 = x1; + t2 = x2; + t3 = x3; + t4 = x4; + t5 = x5; + t6 = x6; + t7 = x7; + + for (rounds = 8; rounds; rounds -= 2) { + z0 = _mm_add_epi64(x0, x2); + z1 = _mm_add_epi64(x1, x3); + z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1)); + z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1)); + x6 = _mm_xor_si128(x6, z0); + x7 = _mm_xor_si128(x7, z1); + + z0 = _mm_add_epi64(x6, x0); + z1 = _mm_add_epi64(x7, x1); + z2 = _mm_srli_epi64(z0, 64-13); + z3 = _mm_srli_epi64(z1, 64-13); + z0 = _mm_slli_epi64(z0, 13); + z1 = _mm_slli_epi64(z1, 13); + x4 = _mm_xor_si128(x4, z2); + x5 = _mm_xor_si128(x5, z3); + x4 = _mm_xor_si128(x4, z0); + x5 = _mm_xor_si128(x5, z1); + + z0 = _mm_add_epi64(x4, x6); + z1 = _mm_add_epi64(x5, x7); + z2 = _mm_srli_epi64(z0, 64-39); + z3 = _mm_srli_epi64(z1, 64-39); + z0 = _mm_slli_epi64(z0, 39); + z1 = _mm_slli_epi64(z1, 39); + x2 = _mm_xor_si128(x2, z2); + x3 = _mm_xor_si128(x3, z3); + x2 = _mm_xor_si128(x2, z0); + x3 = _mm_xor_si128(x3, z1); + + z0 = _mm_add_epi64(x2, x4); + z1 = _mm_add_epi64(x3, x5); + z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1)); + z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1)); + x0 = _mm_xor_si128(x0, z0); + x1 = _mm_xor_si128(x1, z1); + + z0 = x2; + z1 = x3; + x2 = _mm_alignr_epi8(x6, x7, 8); + x3 = _mm_alignr_epi8(x7, x6, 8); + x6 = _mm_alignr_epi8(z1, z0, 8); + x7 = _mm_alignr_epi8(z0, z1, 8); + + z0 = _mm_add_epi64(x0, x2); + z1 = _mm_add_epi64(x1, x3); + z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1)); + z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1)); + x6 = _mm_xor_si128(x6, z0); + x7 = _mm_xor_si128(x7, z1); + + z0 = _mm_add_epi64(x6, x0); + z1 = _mm_add_epi64(x7, x1); + z2 = _mm_srli_epi64(z0, 64-13); + z3 = _mm_srli_epi64(z1, 64-13); + z0 = _mm_slli_epi64(z0, 13); + z1 = _mm_slli_epi64(z1, 13); + x5 = _mm_xor_si128(x5, z2); + x4 = _mm_xor_si128(x4, z3); + x5 = _mm_xor_si128(x5, z0); + x4 = _mm_xor_si128(x4, z1); + + z0 = _mm_add_epi64(x5, x6); + z1 = _mm_add_epi64(x4, x7); + z2 = _mm_srli_epi64(z0, 64-39); + z3 = _mm_srli_epi64(z1, 64-39); + z0 = _mm_slli_epi64(z0, 39); + z1 = _mm_slli_epi64(z1, 39); + x2 = _mm_xor_si128(x2, z2); + x3 = _mm_xor_si128(x3, z3); + x2 = _mm_xor_si128(x2, z0); + x3 = _mm_xor_si128(x3, z1); + + z0 = _mm_add_epi64(x2, x5); + z1 = _mm_add_epi64(x3, x4); + z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1)); + z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1)); + x0 = _mm_xor_si128(x0, z0); + x1 = _mm_xor_si128(x1, z1); + + z0 = x2; + z1 = x3; + x2 = _mm_alignr_epi8(x6, x7, 8); + x3 = _mm_alignr_epi8(x7, x6, 8); + x6 = _mm_alignr_epi8(z1, z0, 8); + x7 = _mm_alignr_epi8(z0, z1, 8); + } + + x0 = _mm_add_epi64(x0, t0); + x1 = _mm_add_epi64(x1, t1); + x2 = _mm_add_epi64(x2, t2); + x3 = _mm_add_epi64(x3, t3); + x4 = _mm_add_epi64(x4, t4); + x5 = _mm_add_epi64(x5, t5); + x6 = _mm_add_epi64(x6, t6); + x7 = _mm_add_epi64(x7, t7); + + /* 4: Y_i = X */ + /* 6: B'[0..r-1] = Y_even */ + /* 6: B'[r..2r-1] = Y_odd */ + xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half); + xmmp[0] = x0; + xmmp[1] = x1; + xmmp[2] = x2; + xmmp[3] = x3; + xmmp[4] = x4; + xmmp[5] = x5; + xmmp[6] = x6; + xmmp[7] = x7; + } +} + +#endif + +#if defined(SCRYPT_SALSA64_AVX) + /* uses salsa64_core_tangle_sse2 */ + + #undef SCRYPT_MIX + #define SCRYPT_MIX "Salsa64/8-AVX" + #undef SCRYPT_SALSA64_INCLUDED + #define SCRYPT_SALSA64_INCLUDED +#endif diff --git a/bf/scrypt-jane/code/scrypt-jane-mix_salsa64-avx2.h b/bf/scrypt-jane/code/scrypt-jane-mix_salsa64-avx2.h new file mode 100644 index 0000000..a42e808 --- /dev/null +++ b/bf/scrypt-jane/code/scrypt-jane-mix_salsa64-avx2.h @@ -0,0 +1,221 @@ +/* x64 */ +#if defined(X86_64ASM_AVX2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS) + +#define SCRYPT_SALSA64_AVX2 + +asm_naked_fn_proto(void, scrypt_ChunkMix_avx2)(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r) +asm_naked_fn(scrypt_ChunkMix_avx2) + a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */ + a2(shl rcx,7) + a2(lea r9,[rcx-128]) + a2(lea rax,[rsi+r9]) + a2(lea r9,[rdx+r9]) + a2(and rdx, rdx) + a2(vmovdqa ymm0,[rax+0]) + a2(vmovdqa ymm1,[rax+32]) + a2(vmovdqa ymm2,[rax+64]) + a2(vmovdqa ymm3,[rax+96]) + aj(jz scrypt_ChunkMix_avx2_no_xor1) + a3(vpxor ymm0,ymm0,[r9+0]) + a3(vpxor ymm1,ymm1,[r9+32]) + a3(vpxor ymm2,ymm2,[r9+64]) + a3(vpxor ymm3,ymm3,[r9+96]) + a1(scrypt_ChunkMix_avx2_no_xor1:) + a2(xor r9,r9) + a2(xor r8,r8) + a1(scrypt_ChunkMix_avx2_loop:) + a2(and rdx, rdx) + a3(vpxor ymm0,ymm0,[rsi+r9+0]) + a3(vpxor ymm1,ymm1,[rsi+r9+32]) + a3(vpxor ymm2,ymm2,[rsi+r9+64]) + a3(vpxor ymm3,ymm3,[rsi+r9+96]) + aj(jz scrypt_ChunkMix_avx2_no_xor2) + a3(vpxor ymm0,ymm0,[rdx+r9+0]) + a3(vpxor ymm1,ymm1,[rdx+r9+32]) + a3(vpxor ymm2,ymm2,[rdx+r9+64]) + a3(vpxor ymm3,ymm3,[rdx+r9+96]) + a1(scrypt_ChunkMix_avx2_no_xor2:) + a2(vmovdqa ymm6,ymm0) + a2(vmovdqa ymm7,ymm1) + a2(vmovdqa ymm8,ymm2) + a2(vmovdqa ymm9,ymm3) + a2(mov rax,4) + a1(scrypt_salsa64_avx2_loop: ) + a3(vpaddq ymm4, ymm1, ymm0) + a3(vpshufd ymm4, ymm4, 0xb1) + a3(vpxor ymm3, ymm3, ymm4) + a3(vpaddq ymm4, ymm0, ymm3) + a3(vpsrlq ymm5, ymm4, 51) + a3(vpxor ymm2, ymm2, ymm5) + a3(vpsllq ymm4, ymm4, 13) + a3(vpxor ymm2, ymm2, ymm4) + a3(vpaddq ymm4, ymm3, ymm2) + a3(vpsrlq ymm5, ymm4, 25) + a3(vpxor ymm1, ymm1, ymm5) + a3(vpsllq ymm4, ymm4, 39) + a3(vpxor ymm1, ymm1, ymm4) + a3(vpaddq ymm4, ymm2, ymm1) + a3(vpshufd ymm4, ymm4, 0xb1) + a3(vpermq ymm1, ymm1, 0x39) + a3(vpermq ymm10, ymm2, 0x4e) + a3(vpxor ymm0, ymm0, ymm4) + a3(vpermq ymm3, ymm3, 0x93) + a3(vpaddq ymm4, ymm3, ymm0) + a3(vpshufd ymm4, ymm4, 0xb1) + a3(vpxor ymm1, ymm1, ymm4) + a3(vpaddq ymm4, ymm0, ymm1) + a3(vpsrlq ymm5, ymm4, 51) + a3(vpxor ymm10, ymm10, ymm5) + a3(vpsllq ymm4, ymm4, 13) + a3(vpxor ymm10, ymm10, ymm4) + a3(vpaddq ymm4, ymm1, ymm10) + a3(vpsrlq ymm5, ymm4, 25) + a3(vpxor ymm3, ymm3, ymm5) + a3(vpsllq ymm4, ymm4, 39) + a3(vpermq ymm1, ymm1, 0x93) + a3(vpxor ymm3, ymm3, ymm4) + a3(vpermq ymm2, ymm10, 0x4e) + a3(vpaddq ymm4, ymm10, ymm3) + a3(vpshufd ymm4, ymm4, 0xb1) + a3(vpermq ymm3, ymm3, 0x39) + a3(vpxor ymm0, ymm0, ymm4) + a1(dec rax) + aj(jnz scrypt_salsa64_avx2_loop) + a3(vpaddq ymm0,ymm0,ymm6) + a3(vpaddq ymm1,ymm1,ymm7) + a3(vpaddq ymm2,ymm2,ymm8) + a3(vpaddq ymm3,ymm3,ymm9) + a2(lea rax,[r8+r9]) + a2(xor r8,rcx) + a2(and rax,~0xff) + a2(add r9,128) + a2(shr rax,1) + a2(add rax, rdi) + a2(cmp r9,rcx) + a2(vmovdqa [rax+0],ymm0) + a2(vmovdqa [rax+32],ymm1) + a2(vmovdqa [rax+64],ymm2) + a2(vmovdqa [rax+96],ymm3) + aj(jne scrypt_ChunkMix_avx2_loop) + a1(vzeroupper) + a1(ret) +asm_naked_fn_end(scrypt_ChunkMix_avx2) + +#endif + + +/* intrinsic */ +#if defined(X86_INTRINSIC_AVX2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED)) + +#define SCRYPT_SALSA64_AVX2 + +static void asm_calling_convention +scrypt_ChunkMix_avx2(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r) { + uint32_t i, blocksPerChunk = r * 2, half = 0; + ymmi *ymmp,y0,y1,y2,y3,t0,t1,t2,t3,z0,z1; + size_t rounds; + + /* 1: X = B_{2r - 1} */ + ymmp = (ymmi *)scrypt_block(Bin, blocksPerChunk - 1); + y0 = ymmp[0]; + y1 = ymmp[1]; + y2 = ymmp[2]; + y3 = ymmp[3]; + + if (Bxor) { + ymmp = (ymmi *)scrypt_block(Bxor, blocksPerChunk - 1); + y0 = _mm256_xor_si256(y0, ymmp[0]); + y1 = _mm256_xor_si256(y1, ymmp[1]); + y2 = _mm256_xor_si256(y2, ymmp[2]); + y3 = _mm256_xor_si256(y3, ymmp[3]); + } + + /* 2: for i = 0 to 2r - 1 do */ + for (i = 0; i < blocksPerChunk; i++, half ^= r) { + /* 3: X = H(X ^ B_i) */ + ymmp = (ymmi *)scrypt_block(Bin, i); + y0 = _mm256_xor_si256(y0, ymmp[0]); + y1 = _mm256_xor_si256(y1, ymmp[1]); + y2 = _mm256_xor_si256(y2, ymmp[2]); + y3 = _mm256_xor_si256(y3, ymmp[3]); + + if (Bxor) { + ymmp = (ymmi *)scrypt_block(Bxor, i); + y0 = _mm256_xor_si256(y0, ymmp[0]); + y1 = _mm256_xor_si256(y1, ymmp[1]); + y2 = _mm256_xor_si256(y2, ymmp[2]); + y3 = _mm256_xor_si256(y3, ymmp[3]); + } + + t0 = y0; + t1 = y1; + t2 = y2; + t3 = y3; + + for (rounds = 8; rounds; rounds -= 2) { + z0 = _mm256_add_epi64(y0, y1); + z0 = _mm256_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1)); + y3 = _mm256_xor_si256(y3, z0); + z0 = _mm256_add_epi64(y3, y0); + z1 = _mm256_srli_epi64(z0, 64-13); + y2 = _mm256_xor_si256(y2, z1); + z0 = _mm256_slli_epi64(z0, 13); + y2 = _mm256_xor_si256(y2, z0); + z0 = _mm256_add_epi64(y2, y3); + z1 = _mm256_srli_epi64(z0, 64-39); + y1 = _mm256_xor_si256(y1, z1); + z0 = _mm256_slli_epi64(z0, 39); + y1 = _mm256_xor_si256(y1, z0); + y1 = _mm256_permute4x64_epi64(y1, _MM_SHUFFLE(0,3,2,1)); + y2 = _mm256_permute4x64_epi64(y2, _MM_SHUFFLE(1,0,3,2)); + y3 = _mm256_permute4x64_epi64(y3, _MM_SHUFFLE(2,1,0,3)); + z0 = _mm256_add_epi64(y1, y2); + z0 = _mm256_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1)); + y0 = _mm256_xor_si256(y0, z0); + z0 = _mm256_add_epi64(y0, y3); + z0 = _mm256_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1)); + y1 = _mm256_xor_si256(y1, z0); + z0 = _mm256_add_epi64(y1, y0); + z1 = _mm256_srli_epi64(z0, 64-13); + y2 = _mm256_xor_si256(y2, z1); + z0 = _mm256_slli_epi64(z0, 13); + y2 = _mm256_xor_si256(y2, z0); + z0 = _mm256_add_epi64(y2, y1); + z1 = _mm256_srli_epi64(z0, 64-39); + y3 = _mm256_xor_si256(y3, z1); + z0 = _mm256_slli_epi64(z0, 39); + y3 = _mm256_xor_si256(y3, z0); + z0 = _mm256_add_epi64(y3, y2); + z0 = _mm256_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1)); + y0 = _mm256_xor_si256(y0, z0); + y1 = _mm256_permute4x64_epi64(y1, _MM_SHUFFLE(2,1,0,3)); + y2 = _mm256_permute4x64_epi64(y2, _MM_SHUFFLE(1,0,3,2)); + y3 = _mm256_permute4x64_epi64(y3, _MM_SHUFFLE(0,3,2,1)); + } + + y0 = _mm256_add_epi64(y0, t0); + y1 = _mm256_add_epi64(y1, t1); + y2 = _mm256_add_epi64(y2, t2); + y3 = _mm256_add_epi64(y3, t3); + + /* 4: Y_i = X */ + /* 6: B'[0..r-1] = Y_even */ + /* 6: B'[r..2r-1] = Y_odd */ + ymmp = (ymmi *)scrypt_block(Bout, (i / 2) + half); + ymmp[0] = y0; + ymmp[1] = y1; + ymmp[2] = y2; + ymmp[3] = y3; + } +} + +#endif + +#if defined(SCRYPT_SALSA64_AVX2) + /* uses salsa64_core_tangle_sse2 */ + + #undef SCRYPT_MIX + #define SCRYPT_MIX "Salsa64/8-AVX2" + #undef SCRYPT_SALSA64_INCLUDED + #define SCRYPT_SALSA64_INCLUDED +#endif diff --git a/bf/scrypt-jane/code/scrypt-jane-mix_salsa64-sse2.h b/bf/scrypt-jane/code/scrypt-jane-mix_salsa64-sse2.h new file mode 100644 index 0000000..971d98a --- /dev/null +++ b/bf/scrypt-jane/code/scrypt-jane-mix_salsa64-sse2.h @@ -0,0 +1,449 @@ +/* x64 */ +#if defined(X86_64ASM_SSE2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS) + +#define SCRYPT_SALSA64_SSE2 + +asm_naked_fn_proto(void, scrypt_ChunkMix_sse2)(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r) +asm_naked_fn(scrypt_ChunkMix_sse2) + a1(push rbp) + a2(mov rbp, rsp) + a2(and rsp, ~63) + a2(sub rsp, 128) + a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */ + a2(shl rcx,7) + a2(lea r9,[rcx-128]) + a2(lea rax,[rsi+r9]) + a2(lea r9,[rdx+r9]) + a2(and rdx, rdx) + a2(movdqa xmm0,[rax+0]) + a2(movdqa xmm1,[rax+16]) + a2(movdqa xmm2,[rax+32]) + a2(movdqa xmm3,[rax+48]) + a2(movdqa xmm4,[rax+64]) + a2(movdqa xmm5,[rax+80]) + a2(movdqa xmm6,[rax+96]) + a2(movdqa xmm7,[rax+112]) + aj(jz scrypt_ChunkMix_sse2_no_xor1) + a2(pxor xmm0,[r9+0]) + a2(pxor xmm1,[r9+16]) + a2(pxor xmm2,[r9+32]) + a2(pxor xmm3,[r9+48]) + a2(pxor xmm4,[r9+64]) + a2(pxor xmm5,[r9+80]) + a2(pxor xmm6,[r9+96]) + a2(pxor xmm7,[r9+112]) + a1(scrypt_ChunkMix_sse2_no_xor1:) + a2(xor r9,r9) + a2(xor r8,r8) + a1(scrypt_ChunkMix_sse2_loop:) + a2(and rdx, rdx) + a2(pxor xmm0,[rsi+r9+0]) + a2(pxor xmm1,[rsi+r9+16]) + a2(pxor xmm2,[rsi+r9+32]) + a2(pxor xmm3,[rsi+r9+48]) + a2(pxor xmm4,[rsi+r9+64]) + a2(pxor xmm5,[rsi+r9+80]) + a2(pxor xmm6,[rsi+r9+96]) + a2(pxor xmm7,[rsi+r9+112]) + aj(jz scrypt_ChunkMix_sse2_no_xor2) + a2(pxor xmm0,[rdx+r9+0]) + a2(pxor xmm1,[rdx+r9+16]) + a2(pxor xmm2,[rdx+r9+32]) + a2(pxor xmm3,[rdx+r9+48]) + a2(pxor xmm4,[rdx+r9+64]) + a2(pxor xmm5,[rdx+r9+80]) + a2(pxor xmm6,[rdx+r9+96]) + a2(pxor xmm7,[rdx+r9+112]) + a1(scrypt_ChunkMix_sse2_no_xor2:) + a2(movdqa [rsp+0],xmm0) + a2(movdqa [rsp+16],xmm1) + a2(movdqa [rsp+32],xmm2) + a2(movdqa [rsp+48],xmm3) + a2(movdqa [rsp+64],xmm4) + a2(movdqa [rsp+80],xmm5) + a2(movdqa [rsp+96],xmm6) + a2(movdqa [rsp+112],xmm7) + a2(mov rax,8) + a1(scrypt_salsa64_sse2_loop: ) + a2(movdqa xmm8, xmm0) + a2(movdqa xmm9, xmm1) + a2(paddq xmm8, xmm2) + a2(paddq xmm9, xmm3) + a3(pshufd xmm8, xmm8, 0xb1) + a3(pshufd xmm9, xmm9, 0xb1) + a2(pxor xmm6, xmm8) + a2(pxor xmm7, xmm9) + a2(movdqa xmm10, xmm0) + a2(movdqa xmm11, xmm1) + a2(paddq xmm10, xmm6) + a2(paddq xmm11, xmm7) + a2(movdqa xmm8, xmm10) + a2(movdqa xmm9, xmm11) + a2(psrlq xmm10, 51) + a2(psrlq xmm11, 51) + a2(psllq xmm8, 13) + a2(psllq xmm9, 13) + a2(pxor xmm4, xmm10) + a2(pxor xmm5, xmm11) + a2(pxor xmm4, xmm8) + a2(pxor xmm5, xmm9) + a2(movdqa xmm10, xmm6) + a2(movdqa xmm11, xmm7) + a2(paddq xmm10, xmm4) + a2(paddq xmm11, xmm5) + a2(movdqa xmm8, xmm10) + a2(movdqa xmm9, xmm11) + a2(psrlq xmm10, 25) + a2(psrlq xmm11, 25) + a2(psllq xmm8, 39) + a2(psllq xmm9, 39) + a2(pxor xmm2, xmm10) + a2(pxor xmm3, xmm11) + a2(pxor xmm2, xmm8) + a2(pxor xmm3, xmm9) + a2(movdqa xmm8, xmm4) + a2(movdqa xmm9, xmm5) + a2(paddq xmm8, xmm2) + a2(paddq xmm9, xmm3) + a3(pshufd xmm8, xmm8, 0xb1) + a3(pshufd xmm9, xmm9, 0xb1) + a2(pxor xmm0, xmm8) + a2(pxor xmm1, xmm9) + a2(movdqa xmm8, xmm2) + a2(movdqa xmm9, xmm3) + a2(movdqa xmm10, xmm6) + a2(movdqa xmm11, xmm7) + a2(movdqa xmm2, xmm7) + a2(movdqa xmm3, xmm6) + a2(punpcklqdq xmm10, xmm6) + a2(punpcklqdq xmm11, xmm7) + a2(movdqa xmm6, xmm8) + a2(movdqa xmm7, xmm9) + a2(punpcklqdq xmm9, xmm9) + a2(punpcklqdq xmm8, xmm8) + a2(punpckhqdq xmm2, xmm10) + a2(punpckhqdq xmm3, xmm11) + a2(punpckhqdq xmm6, xmm9) + a2(punpckhqdq xmm7, xmm8) + a2(sub rax, 2) + a2(movdqa xmm8, xmm0) + a2(movdqa xmm9, xmm1) + a2(paddq xmm8, xmm2) + a2(paddq xmm9, xmm3) + a3(pshufd xmm8, xmm8, 0xb1) + a3(pshufd xmm9, xmm9, 0xb1) + a2(pxor xmm6, xmm8) + a2(pxor xmm7, xmm9) + a2(movdqa xmm10, xmm0) + a2(movdqa xmm11, xmm1) + a2(paddq xmm10, xmm6) + a2(paddq xmm11, xmm7) + a2(movdqa xmm8, xmm10) + a2(movdqa xmm9, xmm11) + a2(psrlq xmm10, 51) + a2(psrlq xmm11, 51) + a2(psllq xmm8, 13) + a2(psllq xmm9, 13) + a2(pxor xmm5, xmm10) + a2(pxor xmm4, xmm11) + a2(pxor xmm5, xmm8) + a2(pxor xmm4, xmm9) + a2(movdqa xmm10, xmm6) + a2(movdqa xmm11, xmm7) + a2(paddq xmm10, xmm5) + a2(paddq xmm11, xmm4) + a2(movdqa xmm8, xmm10) + a2(movdqa xmm9, xmm11) + a2(psrlq xmm10, 25) + a2(psrlq xmm11, 25) + a2(psllq xmm8, 39) + a2(psllq xmm9, 39) + a2(pxor xmm2, xmm10) + a2(pxor xmm3, xmm11) + a2(pxor xmm2, xmm8) + a2(pxor xmm3, xmm9) + a2(movdqa xmm8, xmm5) + a2(movdqa xmm9, xmm4) + a2(paddq xmm8, xmm2) + a2(paddq xmm9, xmm3) + a3(pshufd xmm8, xmm8, 0xb1) + a3(pshufd xmm9, xmm9, 0xb1) + a2(pxor xmm0, xmm8) + a2(pxor xmm1, xmm9) + a2(movdqa xmm8, xmm2) + a2(movdqa xmm9, xmm3) + a2(movdqa xmm10, xmm6) + a2(movdqa xmm11, xmm7) + a2(movdqa xmm2, xmm7) + a2(movdqa xmm3, xmm6) + a2(punpcklqdq xmm10, xmm6) + a2(punpcklqdq xmm11, xmm7) + a2(movdqa xmm6, xmm8) + a2(movdqa xmm7, xmm9) + a2(punpcklqdq xmm9, xmm9) + a2(punpcklqdq xmm8, xmm8) + a2(punpckhqdq xmm2, xmm10) + a2(punpckhqdq xmm3, xmm11) + a2(punpckhqdq xmm6, xmm9) + a2(punpckhqdq xmm7, xmm8) + aj(ja scrypt_salsa64_sse2_loop) + a2(paddq xmm0,[rsp+0]) + a2(paddq xmm1,[rsp+16]) + a2(paddq xmm2,[rsp+32]) + a2(paddq xmm3,[rsp+48]) + a2(paddq xmm4,[rsp+64]) + a2(paddq xmm5,[rsp+80]) + a2(paddq xmm6,[rsp+96]) + a2(paddq xmm7,[rsp+112]) + a2(lea rax,[r8+r9]) + a2(xor r8,rcx) + a2(and rax,~0xff) + a2(add r9,128) + a2(shr rax,1) + a2(add rax, rdi) + a2(cmp r9,rcx) + a2(movdqa [rax+0],xmm0) + a2(movdqa [rax+16],xmm1) + a2(movdqa [rax+32],xmm2) + a2(movdqa [rax+48],xmm3) + a2(movdqa [rax+64],xmm4) + a2(movdqa [rax+80],xmm5) + a2(movdqa [rax+96],xmm6) + a2(movdqa [rax+112],xmm7) + aj(jne scrypt_ChunkMix_sse2_loop) + a2(mov rsp, rbp) + a1(pop rbp) + a1(ret) +asm_naked_fn_end(scrypt_ChunkMix_sse2) + +#endif + + +/* intrinsic */ +#if defined(X86_INTRINSIC_SSE2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED)) + +#define SCRYPT_SALSA64_SSE2 + +static void asm_calling_convention +scrypt_ChunkMix_sse2(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r) { + uint32_t i, blocksPerChunk = r * 2, half = 0; + xmmi *xmmp,x0,x1,x2,x3,x4,x5,x6,x7,t0,t1,t2,t3,t4,t5,t6,t7,z0,z1,z2,z3; + size_t rounds; + + /* 1: X = B_{2r - 1} */ + xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1); + x0 = xmmp[0]; + x1 = xmmp[1]; + x2 = xmmp[2]; + x3 = xmmp[3]; + x4 = xmmp[4]; + x5 = xmmp[5]; + x6 = xmmp[6]; + x7 = xmmp[7]; + + if (Bxor) { + xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1); + x0 = _mm_xor_si128(x0, xmmp[0]); + x1 = _mm_xor_si128(x1, xmmp[1]); + x2 = _mm_xor_si128(x2, xmmp[2]); + x3 = _mm_xor_si128(x3, xmmp[3]); + x4 = _mm_xor_si128(x4, xmmp[4]); + x5 = _mm_xor_si128(x5, xmmp[5]); + x6 = _mm_xor_si128(x6, xmmp[6]); + x7 = _mm_xor_si128(x7, xmmp[7]); + } + + /* 2: for i = 0 to 2r - 1 do */ + for (i = 0; i < blocksPerChunk; i++, half ^= r) { + /* 3: X = H(X ^ B_i) */ + xmmp = (xmmi *)scrypt_block(Bin, i); + x0 = _mm_xor_si128(x0, xmmp[0]); + x1 = _mm_xor_si128(x1, xmmp[1]); + x2 = _mm_xor_si128(x2, xmmp[2]); + x3 = _mm_xor_si128(x3, xmmp[3]); + x4 = _mm_xor_si128(x4, xmmp[4]); + x5 = _mm_xor_si128(x5, xmmp[5]); + x6 = _mm_xor_si128(x6, xmmp[6]); + x7 = _mm_xor_si128(x7, xmmp[7]); + + if (Bxor) { + xmmp = (xmmi *)scrypt_block(Bxor, i); + x0 = _mm_xor_si128(x0, xmmp[0]); + x1 = _mm_xor_si128(x1, xmmp[1]); + x2 = _mm_xor_si128(x2, xmmp[2]); + x3 = _mm_xor_si128(x3, xmmp[3]); + x4 = _mm_xor_si128(x4, xmmp[4]); + x5 = _mm_xor_si128(x5, xmmp[5]); + x6 = _mm_xor_si128(x6, xmmp[6]); + x7 = _mm_xor_si128(x7, xmmp[7]); + } + + t0 = x0; + t1 = x1; + t2 = x2; + t3 = x3; + t4 = x4; + t5 = x5; + t6 = x6; + t7 = x7; + + for (rounds = 8; rounds; rounds -= 2) { + z0 = _mm_add_epi64(x0, x2); + z1 = _mm_add_epi64(x1, x3); + z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1)); + z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1)); + x6 = _mm_xor_si128(x6, z0); + x7 = _mm_xor_si128(x7, z1); + + z0 = _mm_add_epi64(x6, x0); + z1 = _mm_add_epi64(x7, x1); + z2 = _mm_srli_epi64(z0, 64-13); + z3 = _mm_srli_epi64(z1, 64-13); + z0 = _mm_slli_epi64(z0, 13); + z1 = _mm_slli_epi64(z1, 13); + x4 = _mm_xor_si128(x4, z2); + x5 = _mm_xor_si128(x5, z3); + x4 = _mm_xor_si128(x4, z0); + x5 = _mm_xor_si128(x5, z1); + + z0 = _mm_add_epi64(x4, x6); + z1 = _mm_add_epi64(x5, x7); + z2 = _mm_srli_epi64(z0, 64-39); + z3 = _mm_srli_epi64(z1, 64-39); + z0 = _mm_slli_epi64(z0, 39); + z1 = _mm_slli_epi64(z1, 39); + x2 = _mm_xor_si128(x2, z2); + x3 = _mm_xor_si128(x3, z3); + x2 = _mm_xor_si128(x2, z0); + x3 = _mm_xor_si128(x3, z1); + + z0 = _mm_add_epi64(x2, x4); + z1 = _mm_add_epi64(x3, x5); + z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1)); + z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1)); + x0 = _mm_xor_si128(x0, z0); + x1 = _mm_xor_si128(x1, z1); + + z0 = x4; + z1 = x5; + z2 = x2; + z3 = x3; + x4 = z1; + x5 = z0; + x2 = _mm_unpackhi_epi64(x7, _mm_unpacklo_epi64(x6, x6)); + x3 = _mm_unpackhi_epi64(x6, _mm_unpacklo_epi64(x7, x7)); + x6 = _mm_unpackhi_epi64(z2, _mm_unpacklo_epi64(z3, z3)); + x7 = _mm_unpackhi_epi64(z3, _mm_unpacklo_epi64(z2, z2)); + + z0 = _mm_add_epi64(x0, x2); + z1 = _mm_add_epi64(x1, x3); + z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1)); + z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1)); + x6 = _mm_xor_si128(x6, z0); + x7 = _mm_xor_si128(x7, z1); + + z0 = _mm_add_epi64(x6, x0); + z1 = _mm_add_epi64(x7, x1); + z2 = _mm_srli_epi64(z0, 64-13); + z3 = _mm_srli_epi64(z1, 64-13); + z0 = _mm_slli_epi64(z0, 13); + z1 = _mm_slli_epi64(z1, 13); + x4 = _mm_xor_si128(x4, z2); + x5 = _mm_xor_si128(x5, z3); + x4 = _mm_xor_si128(x4, z0); + x5 = _mm_xor_si128(x5, z1); + + z0 = _mm_add_epi64(x4, x6); + z1 = _mm_add_epi64(x5, x7); + z2 = _mm_srli_epi64(z0, 64-39); + z3 = _mm_srli_epi64(z1, 64-39); + z0 = _mm_slli_epi64(z0, 39); + z1 = _mm_slli_epi64(z1, 39); + x2 = _mm_xor_si128(x2, z2); + x3 = _mm_xor_si128(x3, z3); + x2 = _mm_xor_si128(x2, z0); + x3 = _mm_xor_si128(x3, z1); + + z0 = _mm_add_epi64(x2, x4); + z1 = _mm_add_epi64(x3, x5); + z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1)); + z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1)); + x0 = _mm_xor_si128(x0, z0); + x1 = _mm_xor_si128(x1, z1); + + z0 = x4; + z1 = x5; + z2 = x2; + z3 = x3; + x4 = z1; + x5 = z0; + x2 = _mm_unpackhi_epi64(x7, _mm_unpacklo_epi64(x6, x6)); + x3 = _mm_unpackhi_epi64(x6, _mm_unpacklo_epi64(x7, x7)); + x6 = _mm_unpackhi_epi64(z2, _mm_unpacklo_epi64(z3, z3)); + x7 = _mm_unpackhi_epi64(z3, _mm_unpacklo_epi64(z2, z2)); + } + + x0 = _mm_add_epi64(x0, t0); + x1 = _mm_add_epi64(x1, t1); + x2 = _mm_add_epi64(x2, t2); + x3 = _mm_add_epi64(x3, t3); + x4 = _mm_add_epi64(x4, t4); + x5 = _mm_add_epi64(x5, t5); + x6 = _mm_add_epi64(x6, t6); + x7 = _mm_add_epi64(x7, t7); + + /* 4: Y_i = X */ + /* 6: B'[0..r-1] = Y_even */ + /* 6: B'[r..2r-1] = Y_odd */ + xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half); + xmmp[0] = x0; + xmmp[1] = x1; + xmmp[2] = x2; + xmmp[3] = x3; + xmmp[4] = x4; + xmmp[5] = x5; + xmmp[6] = x6; + xmmp[7] = x7; + } +} + +#endif + +#if defined(SCRYPT_SALSA64_SSE2) + #undef SCRYPT_MIX + #define SCRYPT_MIX "Salsa64/8-SSE2" + #undef SCRYPT_SALSA64_INCLUDED + #define SCRYPT_SALSA64_INCLUDED +#endif + +/* sse3/avx use this as well */ +#if defined(SCRYPT_SALSA64_INCLUDED) + /* + Default layout: + 0 1 2 3 + 4 5 6 7 + 8 9 10 11 + 12 13 14 15 + + SSE2 layout: + 0 5 10 15 + 12 1 6 11 + 8 13 2 7 + 4 9 14 3 + */ + + + static void asm_calling_convention + salsa64_core_tangle_sse2(uint64_t *blocks, size_t count) { + uint64_t t; + while (count--) { + t = blocks[1]; blocks[1] = blocks[5]; blocks[5] = t; + t = blocks[2]; blocks[2] = blocks[10]; blocks[10] = t; + t = blocks[3]; blocks[3] = blocks[15]; blocks[15] = t; + t = blocks[4]; blocks[4] = blocks[12]; blocks[12] = t; + t = blocks[7]; blocks[7] = blocks[11]; blocks[11] = t; + t = blocks[9]; blocks[9] = blocks[13]; blocks[13] = t; + blocks += 16; + } + } +#endif \ No newline at end of file diff --git a/bf/scrypt-jane/code/scrypt-jane-mix_salsa64-ssse3.h b/bf/scrypt-jane/code/scrypt-jane-mix_salsa64-ssse3.h new file mode 100644 index 0000000..d184128 --- /dev/null +++ b/bf/scrypt-jane/code/scrypt-jane-mix_salsa64-ssse3.h @@ -0,0 +1,399 @@ +/* x64 */ +#if defined(X86_64ASM_SSSE3) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS) + +#define SCRYPT_SALSA64_SSSE3 + +asm_naked_fn_proto(void, scrypt_ChunkMix_ssse3)(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r) +asm_naked_fn(scrypt_ChunkMix_ssse3) + a1(push rbp) + a2(mov rbp, rsp) + a2(and rsp, ~63) + a2(sub rsp, 128) + a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */ + a2(shl rcx,7) + a2(lea r9,[rcx-128]) + a2(lea rax,[rsi+r9]) + a2(lea r9,[rdx+r9]) + a2(and rdx, rdx) + a2(movdqa xmm0,[rax+0]) + a2(movdqa xmm1,[rax+16]) + a2(movdqa xmm2,[rax+32]) + a2(movdqa xmm3,[rax+48]) + a2(movdqa xmm4,[rax+64]) + a2(movdqa xmm5,[rax+80]) + a2(movdqa xmm6,[rax+96]) + a2(movdqa xmm7,[rax+112]) + aj(jz scrypt_ChunkMix_ssse3_no_xor1) + a2(pxor xmm0,[r9+0]) + a2(pxor xmm1,[r9+16]) + a2(pxor xmm2,[r9+32]) + a2(pxor xmm3,[r9+48]) + a2(pxor xmm4,[r9+64]) + a2(pxor xmm5,[r9+80]) + a2(pxor xmm6,[r9+96]) + a2(pxor xmm7,[r9+112]) + a1(scrypt_ChunkMix_ssse3_no_xor1:) + a2(xor r9,r9) + a2(xor r8,r8) + a1(scrypt_ChunkMix_ssse3_loop:) + a2(and rdx, rdx) + a2(pxor xmm0,[rsi+r9+0]) + a2(pxor xmm1,[rsi+r9+16]) + a2(pxor xmm2,[rsi+r9+32]) + a2(pxor xmm3,[rsi+r9+48]) + a2(pxor xmm4,[rsi+r9+64]) + a2(pxor xmm5,[rsi+r9+80]) + a2(pxor xmm6,[rsi+r9+96]) + a2(pxor xmm7,[rsi+r9+112]) + aj(jz scrypt_ChunkMix_ssse3_no_xor2) + a2(pxor xmm0,[rdx+r9+0]) + a2(pxor xmm1,[rdx+r9+16]) + a2(pxor xmm2,[rdx+r9+32]) + a2(pxor xmm3,[rdx+r9+48]) + a2(pxor xmm4,[rdx+r9+64]) + a2(pxor xmm5,[rdx+r9+80]) + a2(pxor xmm6,[rdx+r9+96]) + a2(pxor xmm7,[rdx+r9+112]) + a1(scrypt_ChunkMix_ssse3_no_xor2:) + a2(movdqa [rsp+0],xmm0) + a2(movdqa [rsp+16],xmm1) + a2(movdqa [rsp+32],xmm2) + a2(movdqa [rsp+48],xmm3) + a2(movdqa [rsp+64],xmm4) + a2(movdqa [rsp+80],xmm5) + a2(movdqa [rsp+96],xmm6) + a2(movdqa [rsp+112],xmm7) + a2(mov rax,8) + a1(scrypt_salsa64_ssse3_loop: ) + a2(movdqa xmm8, xmm0) + a2(movdqa xmm9, xmm1) + a2(paddq xmm8, xmm2) + a2(paddq xmm9, xmm3) + a3(pshufd xmm8, xmm8, 0xb1) + a3(pshufd xmm9, xmm9, 0xb1) + a2(pxor xmm6, xmm8) + a2(pxor xmm7, xmm9) + a2(movdqa xmm10, xmm0) + a2(movdqa xmm11, xmm1) + a2(paddq xmm10, xmm6) + a2(paddq xmm11, xmm7) + a2(movdqa xmm8, xmm10) + a2(movdqa xmm9, xmm11) + a2(psrlq xmm10, 51) + a2(psrlq xmm11, 51) + a2(psllq xmm8, 13) + a2(psllq xmm9, 13) + a2(pxor xmm4, xmm10) + a2(pxor xmm5, xmm11) + a2(pxor xmm4, xmm8) + a2(pxor xmm5, xmm9) + a2(movdqa xmm10, xmm6) + a2(movdqa xmm11, xmm7) + a2(paddq xmm10, xmm4) + a2(paddq xmm11, xmm5) + a2(movdqa xmm8, xmm10) + a2(movdqa xmm9, xmm11) + a2(psrlq xmm10, 25) + a2(psrlq xmm11, 25) + a2(psllq xmm8, 39) + a2(psllq xmm9, 39) + a2(pxor xmm2, xmm10) + a2(pxor xmm3, xmm11) + a2(pxor xmm2, xmm8) + a2(pxor xmm3, xmm9) + a2(movdqa xmm8, xmm4) + a2(movdqa xmm9, xmm5) + a2(paddq xmm8, xmm2) + a2(paddq xmm9, xmm3) + a3(pshufd xmm8, xmm8, 0xb1) + a3(pshufd xmm9, xmm9, 0xb1) + a2(pxor xmm0, xmm8) + a2(pxor xmm1, xmm9) + a2(movdqa xmm10, xmm2) + a2(movdqa xmm11, xmm3) + a2(movdqa xmm2, xmm6) + a2(movdqa xmm3, xmm7) + a3(palignr xmm2, xmm7, 8) + a3(palignr xmm3, xmm6, 8) + a2(movdqa xmm6, xmm11) + a2(movdqa xmm7, xmm10) + a3(palignr xmm6, xmm10, 8) + a3(palignr xmm7, xmm11, 8) + a2(sub rax, 2) + a2(movdqa xmm8, xmm0) + a2(movdqa xmm9, xmm1) + a2(paddq xmm8, xmm2) + a2(paddq xmm9, xmm3) + a3(pshufd xmm8, xmm8, 0xb1) + a3(pshufd xmm9, xmm9, 0xb1) + a2(pxor xmm6, xmm8) + a2(pxor xmm7, xmm9) + a2(movdqa xmm10, xmm0) + a2(movdqa xmm11, xmm1) + a2(paddq xmm10, xmm6) + a2(paddq xmm11, xmm7) + a2(movdqa xmm8, xmm10) + a2(movdqa xmm9, xmm11) + a2(psrlq xmm10, 51) + a2(psrlq xmm11, 51) + a2(psllq xmm8, 13) + a2(psllq xmm9, 13) + a2(pxor xmm5, xmm10) + a2(pxor xmm4, xmm11) + a2(pxor xmm5, xmm8) + a2(pxor xmm4, xmm9) + a2(movdqa xmm10, xmm6) + a2(movdqa xmm11, xmm7) + a2(paddq xmm10, xmm5) + a2(paddq xmm11, xmm4) + a2(movdqa xmm8, xmm10) + a2(movdqa xmm9, xmm11) + a2(psrlq xmm10, 25) + a2(psrlq xmm11, 25) + a2(psllq xmm8, 39) + a2(psllq xmm9, 39) + a2(pxor xmm2, xmm10) + a2(pxor xmm3, xmm11) + a2(pxor xmm2, xmm8) + a2(pxor xmm3, xmm9) + a2(movdqa xmm8, xmm5) + a2(movdqa xmm9, xmm4) + a2(paddq xmm8, xmm2) + a2(paddq xmm9, xmm3) + a3(pshufd xmm8, xmm8, 0xb1) + a3(pshufd xmm9, xmm9, 0xb1) + a2(pxor xmm0, xmm8) + a2(pxor xmm1, xmm9) + a2(movdqa xmm10, xmm2) + a2(movdqa xmm11, xmm3) + a2(movdqa xmm2, xmm6) + a2(movdqa xmm3, xmm7) + a3(palignr xmm2, xmm7, 8) + a3(palignr xmm3, xmm6, 8) + a2(movdqa xmm6, xmm11) + a2(movdqa xmm7, xmm10) + a3(palignr xmm6, xmm10, 8) + a3(palignr xmm7, xmm11, 8) + aj(ja scrypt_salsa64_ssse3_loop) + a2(paddq xmm0,[rsp+0]) + a2(paddq xmm1,[rsp+16]) + a2(paddq xmm2,[rsp+32]) + a2(paddq xmm3,[rsp+48]) + a2(paddq xmm4,[rsp+64]) + a2(paddq xmm5,[rsp+80]) + a2(paddq xmm6,[rsp+96]) + a2(paddq xmm7,[rsp+112]) + a2(lea rax,[r8+r9]) + a2(xor r8,rcx) + a2(and rax,~0xff) + a2(add r9,128) + a2(shr rax,1) + a2(add rax, rdi) + a2(cmp r9,rcx) + a2(movdqa [rax+0],xmm0) + a2(movdqa [rax+16],xmm1) + a2(movdqa [rax+32],xmm2) + a2(movdqa [rax+48],xmm3) + a2(movdqa [rax+64],xmm4) + a2(movdqa [rax+80],xmm5) + a2(movdqa [rax+96],xmm6) + a2(movdqa [rax+112],xmm7) + aj(jne scrypt_ChunkMix_ssse3_loop) + a2(mov rsp, rbp) + a1(pop rbp) + a1(ret) +asm_naked_fn_end(scrypt_ChunkMix_ssse3) + +#endif + + +/* intrinsic */ +#if defined(X86_INTRINSIC_SSSE3) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED)) + +#define SCRYPT_SALSA64_SSSE3 + +static void asm_calling_convention +scrypt_ChunkMix_ssse3(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r) { + uint32_t i, blocksPerChunk = r * 2, half = 0; + xmmi *xmmp,x0,x1,x2,x3,x4,x5,x6,x7,t0,t1,t2,t3,t4,t5,t6,t7,z0,z1,z2,z3; + size_t rounds; + + /* 1: X = B_{2r - 1} */ + xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1); + x0 = xmmp[0]; + x1 = xmmp[1]; + x2 = xmmp[2]; + x3 = xmmp[3]; + x4 = xmmp[4]; + x5 = xmmp[5]; + x6 = xmmp[6]; + x7 = xmmp[7]; + + if (Bxor) { + xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1); + x0 = _mm_xor_si128(x0, xmmp[0]); + x1 = _mm_xor_si128(x1, xmmp[1]); + x2 = _mm_xor_si128(x2, xmmp[2]); + x3 = _mm_xor_si128(x3, xmmp[3]); + x4 = _mm_xor_si128(x4, xmmp[4]); + x5 = _mm_xor_si128(x5, xmmp[5]); + x6 = _mm_xor_si128(x6, xmmp[6]); + x7 = _mm_xor_si128(x7, xmmp[7]); + } + + /* 2: for i = 0 to 2r - 1 do */ + for (i = 0; i < blocksPerChunk; i++, half ^= r) { + /* 3: X = H(X ^ B_i) */ + xmmp = (xmmi *)scrypt_block(Bin, i); + x0 = _mm_xor_si128(x0, xmmp[0]); + x1 = _mm_xor_si128(x1, xmmp[1]); + x2 = _mm_xor_si128(x2, xmmp[2]); + x3 = _mm_xor_si128(x3, xmmp[3]); + x4 = _mm_xor_si128(x4, xmmp[4]); + x5 = _mm_xor_si128(x5, xmmp[5]); + x6 = _mm_xor_si128(x6, xmmp[6]); + x7 = _mm_xor_si128(x7, xmmp[7]); + + if (Bxor) { + xmmp = (xmmi *)scrypt_block(Bxor, i); + x0 = _mm_xor_si128(x0, xmmp[0]); + x1 = _mm_xor_si128(x1, xmmp[1]); + x2 = _mm_xor_si128(x2, xmmp[2]); + x3 = _mm_xor_si128(x3, xmmp[3]); + x4 = _mm_xor_si128(x4, xmmp[4]); + x5 = _mm_xor_si128(x5, xmmp[5]); + x6 = _mm_xor_si128(x6, xmmp[6]); + x7 = _mm_xor_si128(x7, xmmp[7]); + } + + t0 = x0; + t1 = x1; + t2 = x2; + t3 = x3; + t4 = x4; + t5 = x5; + t6 = x6; + t7 = x7; + + for (rounds = 8; rounds; rounds -= 2) { + z0 = _mm_add_epi64(x0, x2); + z1 = _mm_add_epi64(x1, x3); + z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1)); + z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1)); + x6 = _mm_xor_si128(x6, z0); + x7 = _mm_xor_si128(x7, z1); + + z0 = _mm_add_epi64(x6, x0); + z1 = _mm_add_epi64(x7, x1); + z2 = _mm_srli_epi64(z0, 64-13); + z3 = _mm_srli_epi64(z1, 64-13); + z0 = _mm_slli_epi64(z0, 13); + z1 = _mm_slli_epi64(z1, 13); + x4 = _mm_xor_si128(x4, z2); + x5 = _mm_xor_si128(x5, z3); + x4 = _mm_xor_si128(x4, z0); + x5 = _mm_xor_si128(x5, z1); + + z0 = _mm_add_epi64(x4, x6); + z1 = _mm_add_epi64(x5, x7); + z2 = _mm_srli_epi64(z0, 64-39); + z3 = _mm_srli_epi64(z1, 64-39); + z0 = _mm_slli_epi64(z0, 39); + z1 = _mm_slli_epi64(z1, 39); + x2 = _mm_xor_si128(x2, z2); + x3 = _mm_xor_si128(x3, z3); + x2 = _mm_xor_si128(x2, z0); + x3 = _mm_xor_si128(x3, z1); + + z0 = _mm_add_epi64(x2, x4); + z1 = _mm_add_epi64(x3, x5); + z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1)); + z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1)); + x0 = _mm_xor_si128(x0, z0); + x1 = _mm_xor_si128(x1, z1); + + z0 = x2; + z1 = x3; + x2 = _mm_alignr_epi8(x6, x7, 8); + x3 = _mm_alignr_epi8(x7, x6, 8); + x6 = _mm_alignr_epi8(z1, z0, 8); + x7 = _mm_alignr_epi8(z0, z1, 8); + + z0 = _mm_add_epi64(x0, x2); + z1 = _mm_add_epi64(x1, x3); + z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1)); + z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1)); + x6 = _mm_xor_si128(x6, z0); + x7 = _mm_xor_si128(x7, z1); + + z0 = _mm_add_epi64(x6, x0); + z1 = _mm_add_epi64(x7, x1); + z2 = _mm_srli_epi64(z0, 64-13); + z3 = _mm_srli_epi64(z1, 64-13); + z0 = _mm_slli_epi64(z0, 13); + z1 = _mm_slli_epi64(z1, 13); + x5 = _mm_xor_si128(x5, z2); + x4 = _mm_xor_si128(x4, z3); + x5 = _mm_xor_si128(x5, z0); + x4 = _mm_xor_si128(x4, z1); + + z0 = _mm_add_epi64(x5, x6); + z1 = _mm_add_epi64(x4, x7); + z2 = _mm_srli_epi64(z0, 64-39); + z3 = _mm_srli_epi64(z1, 64-39); + z0 = _mm_slli_epi64(z0, 39); + z1 = _mm_slli_epi64(z1, 39); + x2 = _mm_xor_si128(x2, z2); + x3 = _mm_xor_si128(x3, z3); + x2 = _mm_xor_si128(x2, z0); + x3 = _mm_xor_si128(x3, z1); + + z0 = _mm_add_epi64(x2, x5); + z1 = _mm_add_epi64(x3, x4); + z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1)); + z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1)); + x0 = _mm_xor_si128(x0, z0); + x1 = _mm_xor_si128(x1, z1); + + z0 = x2; + z1 = x3; + x2 = _mm_alignr_epi8(x6, x7, 8); + x3 = _mm_alignr_epi8(x7, x6, 8); + x6 = _mm_alignr_epi8(z1, z0, 8); + x7 = _mm_alignr_epi8(z0, z1, 8); + } + + x0 = _mm_add_epi64(x0, t0); + x1 = _mm_add_epi64(x1, t1); + x2 = _mm_add_epi64(x2, t2); + x3 = _mm_add_epi64(x3, t3); + x4 = _mm_add_epi64(x4, t4); + x5 = _mm_add_epi64(x5, t5); + x6 = _mm_add_epi64(x6, t6); + x7 = _mm_add_epi64(x7, t7); + + /* 4: Y_i = X */ + /* 6: B'[0..r-1] = Y_even */ + /* 6: B'[r..2r-1] = Y_odd */ + xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half); + xmmp[0] = x0; + xmmp[1] = x1; + xmmp[2] = x2; + xmmp[3] = x3; + xmmp[4] = x4; + xmmp[5] = x5; + xmmp[6] = x6; + xmmp[7] = x7; + } +} + +#endif + +#if defined(SCRYPT_SALSA64_SSSE3) + /* uses salsa64_core_tangle_sse2 */ + + #undef SCRYPT_MIX + #define SCRYPT_MIX "Salsa64/8-SSSE3" + #undef SCRYPT_SALSA64_INCLUDED + #define SCRYPT_SALSA64_INCLUDED +#endif diff --git a/bf/scrypt-jane/code/scrypt-jane-mix_salsa64-xop.h b/bf/scrypt-jane/code/scrypt-jane-mix_salsa64-xop.h new file mode 100644 index 0000000..d51d112 --- /dev/null +++ b/bf/scrypt-jane/code/scrypt-jane-mix_salsa64-xop.h @@ -0,0 +1,335 @@ +/* x64 */ +#if defined(X86_64ASM_XOP) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS) + +#define SCRYPT_SALSA64_XOP + +asm_naked_fn_proto(void, scrypt_ChunkMix_xop)(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r) +asm_naked_fn(scrypt_ChunkMix_xop) + a1(push rbp) + a2(mov rbp, rsp) + a2(and rsp, ~63) + a2(sub rsp, 128) + a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */ + a2(shl rcx,7) + a2(lea r9,[rcx-128]) + a2(lea rax,[rsi+r9]) + a2(lea r9,[rdx+r9]) + a2(and rdx, rdx) + a2(vmovdqa xmm0,[rax+0]) + a2(vmovdqa xmm1,[rax+16]) + a2(vmovdqa xmm2,[rax+32]) + a2(vmovdqa xmm3,[rax+48]) + a2(vmovdqa xmm4,[rax+64]) + a2(vmovdqa xmm5,[rax+80]) + a2(vmovdqa xmm6,[rax+96]) + a2(vmovdqa xmm7,[rax+112]) + aj(jz scrypt_ChunkMix_xop_no_xor1) + a3(vpxor xmm0,xmm0,[r9+0]) + a3(vpxor xmm1,xmm1,[r9+16]) + a3(vpxor xmm2,xmm2,[r9+32]) + a3(vpxor xmm3,xmm3,[r9+48]) + a3(vpxor xmm4,xmm4,[r9+64]) + a3(vpxor xmm5,xmm5,[r9+80]) + a3(vpxor xmm6,xmm6,[r9+96]) + a3(vpxor xmm7,xmm7,[r9+112]) + a1(scrypt_ChunkMix_xop_no_xor1:) + a2(xor r9,r9) + a2(xor r8,r8) + a1(scrypt_ChunkMix_xop_loop:) + a2(and rdx, rdx) + a3(vpxor xmm0,xmm0,[rsi+r9+0]) + a3(vpxor xmm1,xmm1,[rsi+r9+16]) + a3(vpxor xmm2,xmm2,[rsi+r9+32]) + a3(vpxor xmm3,xmm3,[rsi+r9+48]) + a3(vpxor xmm4,xmm4,[rsi+r9+64]) + a3(vpxor xmm5,xmm5,[rsi+r9+80]) + a3(vpxor xmm6,xmm6,[rsi+r9+96]) + a3(vpxor xmm7,xmm7,[rsi+r9+112]) + aj(jz scrypt_ChunkMix_xop_no_xor2) + a3(vpxor xmm0,xmm0,[rdx+r9+0]) + a3(vpxor xmm1,xmm1,[rdx+r9+16]) + a3(vpxor xmm2,xmm2,[rdx+r9+32]) + a3(vpxor xmm3,xmm3,[rdx+r9+48]) + a3(vpxor xmm4,xmm4,[rdx+r9+64]) + a3(vpxor xmm5,xmm5,[rdx+r9+80]) + a3(vpxor xmm6,xmm6,[rdx+r9+96]) + a3(vpxor xmm7,xmm7,[rdx+r9+112]) + a1(scrypt_ChunkMix_xop_no_xor2:) + a2(vmovdqa [rsp+0],xmm0) + a2(vmovdqa [rsp+16],xmm1) + a2(vmovdqa [rsp+32],xmm2) + a2(vmovdqa [rsp+48],xmm3) + a2(vmovdqa [rsp+64],xmm4) + a2(vmovdqa [rsp+80],xmm5) + a2(vmovdqa [rsp+96],xmm6) + a2(vmovdqa [rsp+112],xmm7) + a2(mov rax,8) + a1(scrypt_salsa64_xop_loop: ) + a3(vpaddq xmm8, xmm0, xmm2) + a3(vpaddq xmm9, xmm1, xmm3) + a3(vpshufd xmm8, xmm8, 0xb1) + a3(vpshufd xmm9, xmm9, 0xb1) + a3(vpxor xmm6, xmm6, xmm8) + a3(vpxor xmm7, xmm7, xmm9) + a3(vpaddq xmm10, xmm0, xmm6) + a3(vpaddq xmm11, xmm1, xmm7) + a3(vprotq xmm10, xmm10, 13) + a3(vprotq xmm11, xmm11, 13) + a3(vpxor xmm4, xmm4, xmm10) + a3(vpxor xmm5, xmm5, xmm11) + a3(vpaddq xmm8, xmm6, xmm4) + a3(vpaddq xmm9, xmm7, xmm5) + a3(vprotq xmm8, xmm8, 39) + a3(vprotq xmm9, xmm9, 39) + a3(vpxor xmm2, xmm2, xmm8) + a3(vpxor xmm3, xmm3, xmm9) + a3(vpaddq xmm10, xmm4, xmm2) + a3(vpaddq xmm11, xmm5, xmm3) + a3(vpshufd xmm10, xmm10, 0xb1) + a3(vpshufd xmm11, xmm11, 0xb1) + a3(vpxor xmm0, xmm0, xmm10) + a3(vpxor xmm1, xmm1, xmm11) + a2(vmovdqa xmm8, xmm2) + a2(vmovdqa xmm9, xmm3) + a4(vpalignr xmm2, xmm6, xmm7, 8) + a4(vpalignr xmm3, xmm7, xmm6, 8) + a4(vpalignr xmm6, xmm9, xmm8, 8) + a4(vpalignr xmm7, xmm8, xmm9, 8) + a3(vpaddq xmm10, xmm0, xmm2) + a3(vpaddq xmm11, xmm1, xmm3) + a3(vpshufd xmm10, xmm10, 0xb1) + a3(vpshufd xmm11, xmm11, 0xb1) + a3(vpxor xmm6, xmm6, xmm10) + a3(vpxor xmm7, xmm7, xmm11) + a3(vpaddq xmm8, xmm0, xmm6) + a3(vpaddq xmm9, xmm1, xmm7) + a3(vprotq xmm8, xmm8, 13) + a3(vprotq xmm9, xmm9, 13) + a3(vpxor xmm5, xmm5, xmm8) + a3(vpxor xmm4, xmm4, xmm9) + a3(vpaddq xmm10, xmm6, xmm5) + a3(vpaddq xmm11, xmm7, xmm4) + a3(vprotq xmm10, xmm10, 39) + a3(vprotq xmm11, xmm11, 39) + a3(vpxor xmm2, xmm2, xmm10) + a3(vpxor xmm3, xmm3, xmm11) + a3(vpaddq xmm8, xmm5, xmm2) + a3(vpaddq xmm9, xmm4, xmm3) + a3(vpshufd xmm8, xmm8, 0xb1) + a3(vpshufd xmm9, xmm9, 0xb1) + a3(vpxor xmm0, xmm0, xmm8) + a3(vpxor xmm1, xmm1, xmm9) + a2(vmovdqa xmm10, xmm2) + a2(vmovdqa xmm11, xmm3) + a4(vpalignr xmm2, xmm6, xmm7, 8) + a4(vpalignr xmm3, xmm7, xmm6, 8) + a4(vpalignr xmm6, xmm11, xmm10, 8) + a4(vpalignr xmm7, xmm10, xmm11, 8) + a2(sub rax, 2) + aj(ja scrypt_salsa64_xop_loop) + a3(vpaddq xmm0,xmm0,[rsp+0]) + a3(vpaddq xmm1,xmm1,[rsp+16]) + a3(vpaddq xmm2,xmm2,[rsp+32]) + a3(vpaddq xmm3,xmm3,[rsp+48]) + a3(vpaddq xmm4,xmm4,[rsp+64]) + a3(vpaddq xmm5,xmm5,[rsp+80]) + a3(vpaddq xmm6,xmm6,[rsp+96]) + a3(vpaddq xmm7,xmm7,[rsp+112]) + a2(lea rax,[r8+r9]) + a2(xor r8,rcx) + a2(and rax,~0xff) + a2(add r9,128) + a2(shr rax,1) + a2(add rax, rdi) + a2(cmp r9,rcx) + a2(vmovdqa [rax+0],xmm0) + a2(vmovdqa [rax+16],xmm1) + a2(vmovdqa [rax+32],xmm2) + a2(vmovdqa [rax+48],xmm3) + a2(vmovdqa [rax+64],xmm4) + a2(vmovdqa [rax+80],xmm5) + a2(vmovdqa [rax+96],xmm6) + a2(vmovdqa [rax+112],xmm7) + aj(jne scrypt_ChunkMix_xop_loop) + a2(mov rsp, rbp) + a1(pop rbp) + a1(ret) +asm_naked_fn_end(scrypt_ChunkMix_xop) + +#endif + + +/* intrinsic */ +#if defined(X86_INTRINSIC_XOP) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED)) + +#define SCRYPT_SALSA64_XOP + +static void asm_calling_convention +scrypt_ChunkMix_xop(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r) { + uint32_t i, blocksPerChunk = r * 2, half = 0; + xmmi *xmmp,x0,x1,x2,x3,x4,x5,x6,x7,t0,t1,t2,t3,t4,t5,t6,t7,z0,z1,z2,z3; + size_t rounds; + + /* 1: X = B_{2r - 1} */ + xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1); + x0 = xmmp[0]; + x1 = xmmp[1]; + x2 = xmmp[2]; + x3 = xmmp[3]; + x4 = xmmp[4]; + x5 = xmmp[5]; + x6 = xmmp[6]; + x7 = xmmp[7]; + + if (Bxor) { + xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1); + x0 = _mm_xor_si128(x0, xmmp[0]); + x1 = _mm_xor_si128(x1, xmmp[1]); + x2 = _mm_xor_si128(x2, xmmp[2]); + x3 = _mm_xor_si128(x3, xmmp[3]); + x4 = _mm_xor_si128(x4, xmmp[4]); + x5 = _mm_xor_si128(x5, xmmp[5]); + x6 = _mm_xor_si128(x6, xmmp[6]); + x7 = _mm_xor_si128(x7, xmmp[7]); + } + + /* 2: for i = 0 to 2r - 1 do */ + for (i = 0; i < blocksPerChunk; i++, half ^= r) { + /* 3: X = H(X ^ B_i) */ + xmmp = (xmmi *)scrypt_block(Bin, i); + x0 = _mm_xor_si128(x0, xmmp[0]); + x1 = _mm_xor_si128(x1, xmmp[1]); + x2 = _mm_xor_si128(x2, xmmp[2]); + x3 = _mm_xor_si128(x3, xmmp[3]); + x4 = _mm_xor_si128(x4, xmmp[4]); + x5 = _mm_xor_si128(x5, xmmp[5]); + x6 = _mm_xor_si128(x6, xmmp[6]); + x7 = _mm_xor_si128(x7, xmmp[7]); + + if (Bxor) { + xmmp = (xmmi *)scrypt_block(Bxor, i); + x0 = _mm_xor_si128(x0, xmmp[0]); + x1 = _mm_xor_si128(x1, xmmp[1]); + x2 = _mm_xor_si128(x2, xmmp[2]); + x3 = _mm_xor_si128(x3, xmmp[3]); + x4 = _mm_xor_si128(x4, xmmp[4]); + x5 = _mm_xor_si128(x5, xmmp[5]); + x6 = _mm_xor_si128(x6, xmmp[6]); + x7 = _mm_xor_si128(x7, xmmp[7]); + } + + t0 = x0; + t1 = x1; + t2 = x2; + t3 = x3; + t4 = x4; + t5 = x5; + t6 = x6; + t7 = x7; + + for (rounds = 8; rounds; rounds -= 2) { + z0 = _mm_add_epi64(x0, x2); + z1 = _mm_add_epi64(x1, x3); + z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1)); + z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1)); + x6 = _mm_xor_si128(x6, z0); + x7 = _mm_xor_si128(x7, z1); + + z0 = _mm_add_epi64(x6, x0); + z1 = _mm_add_epi64(x7, x1); + z0 = _mm_roti_epi64(z0, 13); + z1 = _mm_roti_epi64(z1, 13); + x4 = _mm_xor_si128(x4, z0); + x5 = _mm_xor_si128(x5, z1); + + z0 = _mm_add_epi64(x4, x6); + z1 = _mm_add_epi64(x5, x7); + z0 = _mm_roti_epi64(z0, 39); + z1 = _mm_roti_epi64(z1, 39); + x2 = _mm_xor_si128(x2, z0); + x3 = _mm_xor_si128(x3, z1); + + z0 = _mm_add_epi64(x2, x4); + z1 = _mm_add_epi64(x3, x5); + z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1)); + z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1)); + x0 = _mm_xor_si128(x0, z0); + x1 = _mm_xor_si128(x1, z1); + + z0 = x2; + z1 = x3; + x2 = _mm_alignr_epi8(x6, x7, 8); + x3 = _mm_alignr_epi8(x7, x6, 8); + x6 = _mm_alignr_epi8(z1, z0, 8); + x7 = _mm_alignr_epi8(z0, z1, 8); + + z0 = _mm_add_epi64(x0, x2); + z1 = _mm_add_epi64(x1, x3); + z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1)); + z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1)); + x6 = _mm_xor_si128(x6, z0); + x7 = _mm_xor_si128(x7, z1); + + z0 = _mm_add_epi64(x6, x0); + z1 = _mm_add_epi64(x7, x1); + z0 = _mm_roti_epi64(z0, 13); + z1 = _mm_roti_epi64(z1, 13); + x5 = _mm_xor_si128(x5, z0); + x4 = _mm_xor_si128(x4, z1); + + z0 = _mm_add_epi64(x5, x6); + z1 = _mm_add_epi64(x4, x7); + z0 = _mm_roti_epi64(z0, 39); + z1 = _mm_roti_epi64(z1, 39); + x2 = _mm_xor_si128(x2, z0); + x3 = _mm_xor_si128(x3, z1); + + z0 = _mm_add_epi64(x2, x5); + z1 = _mm_add_epi64(x3, x4); + z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1)); + z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1)); + x0 = _mm_xor_si128(x0, z0); + x1 = _mm_xor_si128(x1, z1); + + z0 = x2; + z1 = x3; + x2 = _mm_alignr_epi8(x6, x7, 8); + x3 = _mm_alignr_epi8(x7, x6, 8); + x6 = _mm_alignr_epi8(z1, z0, 8); + x7 = _mm_alignr_epi8(z0, z1, 8); + } + + x0 = _mm_add_epi64(x0, t0); + x1 = _mm_add_epi64(x1, t1); + x2 = _mm_add_epi64(x2, t2); + x3 = _mm_add_epi64(x3, t3); + x4 = _mm_add_epi64(x4, t4); + x5 = _mm_add_epi64(x5, t5); + x6 = _mm_add_epi64(x6, t6); + x7 = _mm_add_epi64(x7, t7); + + /* 4: Y_i = X */ + /* 6: B'[0..r-1] = Y_even */ + /* 6: B'[r..2r-1] = Y_odd */ + xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half); + xmmp[0] = x0; + xmmp[1] = x1; + xmmp[2] = x2; + xmmp[3] = x3; + xmmp[4] = x4; + xmmp[5] = x5; + xmmp[6] = x6; + xmmp[7] = x7; + } +} + +#endif + +#if defined(SCRYPT_SALSA64_XOP) + /* uses salsa64_core_tangle_sse2 */ + + #undef SCRYPT_MIX + #define SCRYPT_MIX "Salsa64/8-XOP" + #undef SCRYPT_SALSA64_INCLUDED + #define SCRYPT_SALSA64_INCLUDED +#endif diff --git a/bf/scrypt-jane/code/scrypt-jane-mix_salsa64.h b/bf/scrypt-jane/code/scrypt-jane-mix_salsa64.h new file mode 100644 index 0000000..2aec04f --- /dev/null +++ b/bf/scrypt-jane/code/scrypt-jane-mix_salsa64.h @@ -0,0 +1,41 @@ +#if !defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED) + +#undef SCRYPT_MIX +#define SCRYPT_MIX "Salsa64/8 Ref" + +#undef SCRYPT_SALSA64_INCLUDED +#define SCRYPT_SALSA64_INCLUDED +#define SCRYPT_SALSA64_BASIC + +static void +salsa64_core_basic(uint64_t state[16]) { + const size_t rounds = 8; + uint64_t v[16], t; + size_t i; + + for (i = 0; i < 16; i++) v[i] = state[i]; + + #define G(a,b,c,d) \ + t = v[a]+v[d]; t = ROTL64(t, 32); v[b] ^= t; \ + t = v[b]+v[a]; t = ROTL64(t, 13); v[c] ^= t; \ + t = v[c]+v[b]; t = ROTL64(t, 39); v[d] ^= t; \ + t = v[d]+v[c]; t = ROTL64(t, 32); v[a] ^= t; \ + + for (i = 0; i < rounds; i += 2) { + G( 0, 4, 8,12); + G( 5, 9,13, 1); + G(10,14, 2, 6); + G(15, 3, 7,11); + G( 0, 1, 2, 3); + G( 5, 6, 7, 4); + G(10,11, 8, 9); + G(15,12,13,14); + } + + for (i = 0; i < 16; i++) state[i] += v[i]; + + #undef G +} + +#endif + diff --git a/bf/scrypt-jane/code/scrypt-jane-pbkdf2.h b/bf/scrypt-jane/code/scrypt-jane-pbkdf2.h new file mode 100644 index 0000000..711e3d6 --- /dev/null +++ b/bf/scrypt-jane/code/scrypt-jane-pbkdf2.h @@ -0,0 +1,112 @@ +typedef struct scrypt_hmac_state_t { + scrypt_hash_state inner, outer; +} scrypt_hmac_state; + + +static void +scrypt_hash(scrypt_hash_digest hash, const uint8_t *m, size_t mlen) { + scrypt_hash_state st; + scrypt_hash_init(&st); + scrypt_hash_update(&st, m, mlen); + scrypt_hash_finish(&st, hash); +} + +/* hmac */ +static void +scrypt_hmac_init(scrypt_hmac_state *st, const uint8_t *key, size_t keylen) { + uint8_t pad[SCRYPT_HASH_BLOCK_SIZE] = {0}; + size_t i; + + scrypt_hash_init(&st->inner); + scrypt_hash_init(&st->outer); + + if (keylen <= SCRYPT_HASH_BLOCK_SIZE) { + /* use the key directly if it's <= blocksize bytes */ + memcpy(pad, key, keylen); + } else { + /* if it's > blocksize bytes, hash it */ + scrypt_hash(pad, key, keylen); + } + + /* inner = (key ^ 0x36) */ + /* h(inner || ...) */ + for (i = 0; i < SCRYPT_HASH_BLOCK_SIZE; i++) + pad[i] ^= 0x36; + scrypt_hash_update(&st->inner, pad, SCRYPT_HASH_BLOCK_SIZE); + + /* outer = (key ^ 0x5c) */ + /* h(outer || ...) */ + for (i = 0; i < SCRYPT_HASH_BLOCK_SIZE; i++) + pad[i] ^= (0x5c ^ 0x36); + scrypt_hash_update(&st->outer, pad, SCRYPT_HASH_BLOCK_SIZE); + + scrypt_ensure_zero(pad, sizeof(pad)); +} + +static void +scrypt_hmac_update(scrypt_hmac_state *st, const uint8_t *m, size_t mlen) { + /* h(inner || m...) */ + scrypt_hash_update(&st->inner, m, mlen); +} + +static void +scrypt_hmac_finish(scrypt_hmac_state *st, scrypt_hash_digest mac) { + /* h(inner || m) */ + scrypt_hash_digest innerhash; + scrypt_hash_finish(&st->inner, innerhash); + + /* h(outer || h(inner || m)) */ + scrypt_hash_update(&st->outer, innerhash, sizeof(innerhash)); + scrypt_hash_finish(&st->outer, mac); + + scrypt_ensure_zero(st, sizeof(*st)); +} + +static void +scrypt_pbkdf2(const uint8_t *password, size_t password_len, const uint8_t *salt, size_t salt_len, uint64_t N, uint8_t *out, size_t bytes) { + scrypt_hmac_state hmac_pw, hmac_pw_salt, work; + scrypt_hash_digest ti, u; + uint8_t be[4]; + uint32_t i, j, blocks; + uint64_t c; + + /* bytes must be <= (0xffffffff - (SCRYPT_HASH_DIGEST_SIZE - 1)), which they will always be under scrypt */ + + /* hmac(password, ...) */ + scrypt_hmac_init(&hmac_pw, password, password_len); + + /* hmac(password, salt...) */ + hmac_pw_salt = hmac_pw; + scrypt_hmac_update(&hmac_pw_salt, salt, salt_len); + + blocks = ((uint32_t)bytes + (SCRYPT_HASH_DIGEST_SIZE - 1)) / SCRYPT_HASH_DIGEST_SIZE; + for (i = 1; i <= blocks; i++) { + /* U1 = hmac(password, salt || be(i)) */ + U32TO8_BE(be, i); + work = hmac_pw_salt; + scrypt_hmac_update(&work, be, 4); + scrypt_hmac_finish(&work, ti); + memcpy(u, ti, sizeof(u)); + + /* T[i] = U1 ^ U2 ^ U3... */ + for (c = 0; c < N - 1; c++) { + /* UX = hmac(password, U{X-1}) */ + work = hmac_pw; + scrypt_hmac_update(&work, u, SCRYPT_HASH_DIGEST_SIZE); + scrypt_hmac_finish(&work, u); + + /* T[i] ^= UX */ + for (j = 0; j < sizeof(u); j++) + ti[j] ^= u[j]; + } + + memcpy(out, ti, (bytes > SCRYPT_HASH_DIGEST_SIZE) ? SCRYPT_HASH_DIGEST_SIZE : bytes); + out += SCRYPT_HASH_DIGEST_SIZE; + bytes -= SCRYPT_HASH_DIGEST_SIZE; + } + + scrypt_ensure_zero(ti, sizeof(ti)); + scrypt_ensure_zero(u, sizeof(u)); + scrypt_ensure_zero(&hmac_pw, sizeof(hmac_pw)); + scrypt_ensure_zero(&hmac_pw_salt, sizeof(hmac_pw_salt)); +} diff --git a/bf/scrypt-jane/code/scrypt-jane-portable-x86.h b/bf/scrypt-jane/code/scrypt-jane-portable-x86.h new file mode 100644 index 0000000..396a7bd --- /dev/null +++ b/bf/scrypt-jane/code/scrypt-jane-portable-x86.h @@ -0,0 +1,462 @@ +#if defined(CPU_X86) && (defined(COMPILER_MSVC) || defined(COMPILER_GCC)) + #define X86ASM + + /* gcc 2.95 royally screws up stack alignments on variables */ + #if ((defined(COMPILER_MSVC) && (COMPILER_MSVC >= COMPILER_MSVC_VS6PP)) || (defined(COMPILER_GCC) && (COMPILER_GCC >= 30000))) + #define X86ASM_SSE + #define X86ASM_SSE2 + #endif + #if ((defined(COMPILER_MSVC) && (COMPILER_MSVC >= COMPILER_MSVC_VS2005)) || (defined(COMPILER_GCC) && (COMPILER_GCC >= 40102))) + #define X86ASM_SSSE3 + #endif + #if ((defined(COMPILER_MSVC) && (COMPILER_MSVC >= COMPILER_MSVC_VS2010SP1)) || (defined(COMPILER_GCC) && (COMPILER_GCC >= 40400))) + #define X86ASM_AVX + #define X86ASM_XOP + #endif + #if ((defined(COMPILER_MSVC) && (COMPILER_MSVC >= COMPILER_MSVC_VS2012)) || (defined(COMPILER_GCC) && (COMPILER_GCC >= 40700))) + #define X86ASM_AVX2 + #endif +#endif + +#if defined(CPU_X86_64) && defined(COMPILER_GCC) + #define X86_64ASM + #define X86_64ASM_SSE2 + #if (COMPILER_GCC >= 40102) + #define X86_64ASM_SSSE3 + #endif + #if (COMPILER_GCC >= 40400) + #define X86_64ASM_AVX + #define X86_64ASM_XOP + #endif + #if (COMPILER_GCC >= 40700) + #define X86_64ASM_AVX2 + #endif +#endif + +#if defined(COMPILER_MSVC) && (defined(CPU_X86_FORCE_INTRINSICS) || defined(CPU_X86_64)) + #define X86_INTRINSIC + #if defined(CPU_X86_64) || defined(X86ASM_SSE) + #define X86_INTRINSIC_SSE + #endif + #if defined(CPU_X86_64) || defined(X86ASM_SSE2) + #define X86_INTRINSIC_SSE2 + #endif + #if (COMPILER_MSVC >= COMPILER_MSVC_VS2005) + #define X86_INTRINSIC_SSSE3 + #endif + #if (COMPILER_MSVC >= COMPILER_MSVC_VS2010SP1) + #define X86_INTRINSIC_AVX + #define X86_INTRINSIC_XOP + #endif + #if (COMPILER_MSVC >= COMPILER_MSVC_VS2012) + #define X86_INTRINSIC_AVX2 + #endif +#endif + +#if defined(COMPILER_GCC) && defined(CPU_X86_FORCE_INTRINSICS) + #define X86_INTRINSIC + #if defined(__SSE__) + #define X86_INTRINSIC_SSE + #endif + #if defined(__SSE2__) + #define X86_INTRINSIC_SSE2 + #endif + #if defined(__SSSE3__) + #define X86_INTRINSIC_SSSE3 + #endif + #if defined(__AVX__) + #define X86_INTRINSIC_AVX + #endif + #if defined(__XOP__) + #define X86_INTRINSIC_XOP + #endif + #if defined(__AVX2__) + #define X86_INTRINSIC_AVX2 + #endif +#endif + +/* only use simd on windows (or SSE2 on gcc)! */ +#if defined(CPU_X86_FORCE_INTRINSICS) || defined(X86_INTRINSIC) + #if defined(X86_INTRINSIC_SSE) + #include + #include + typedef __m64 qmm; + typedef __m128 xmm; + typedef __m128d xmmd; + #endif + #if defined(X86_INTRINSIC_SSE2) + #include + typedef __m128i xmmi; + #endif + #if defined(X86_INTRINSIC_SSSE3) + #include + #endif + #if defined(X86_INTRINSIC_AVX) + #include + #endif + #if defined(X86_INTRINSIC_XOP) + #if defined(COMPILER_MSVC) + #include + #else + #include + #endif + #endif + #if defined(X86_INTRINSIC_AVX2) + typedef __m256i ymmi; + #endif +#endif + +#if defined(X86_INTRINSIC_SSE2) + typedef union packedelem8_t { + uint8_t u[16]; + xmmi v; + } packedelem8; + + typedef union packedelem32_t { + uint32_t u[4]; + xmmi v; + } packedelem32; + + typedef union packedelem64_t { + uint64_t u[2]; + xmmi v; + } packedelem64; +#else + typedef union packedelem8_t { + uint8_t u[16]; + uint32_t dw[4]; + } packedelem8; + + typedef union packedelem32_t { + uint32_t u[4]; + uint8_t b[16]; + } packedelem32; + + typedef union packedelem64_t { + uint64_t u[2]; + uint8_t b[16]; + } packedelem64; +#endif + +#if defined(X86_INTRINSIC_SSSE3) + static const packedelem8 ALIGN(16) ssse3_rotl16_32bit = {{2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13}}; + static const packedelem8 ALIGN(16) ssse3_rotl8_32bit = {{3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14}}; +#endif + +/* + x86 inline asm for gcc/msvc. usage: + + asm_naked_fn_proto(return_type, name) (type parm1, type parm2..) + asm_naked_fn(name) + a1(..) + a2(.., ..) + a3(.., .., ..) + 64bit OR 0 paramters: a1(ret) + 32bit AND n parameters: aret(4n), eg aret(16) for 4 parameters + asm_naked_fn_end(name) +*/ + +#if defined(X86ASM) || defined(X86_64ASM) + +#if defined(COMPILER_MSVC) + #pragma warning(disable : 4731) /* frame pointer modified by inline assembly */ + #define a1(x) __asm {x} + #define a2(x, y) __asm {x, y} + #define a3(x, y, z) __asm {x, y, z} + #define a4(x, y, z, w) __asm {x, y, z, w} + #define aj(x) __asm {x} + #define asm_align8 a1(ALIGN 8) + #define asm_align16 a1(ALIGN 16) + + #define asm_calling_convention STDCALL + #define aret(n) a1(ret n) + #define asm_naked_fn_proto(type, fn) static NAKED type asm_calling_convention fn + #define asm_naked_fn(fn) { + #define asm_naked_fn_end(fn) } +#elif defined(COMPILER_GCC) + #define GNU_AS1(x) #x ";\n" + #define GNU_AS2(x, y) #x ", " #y ";\n" + #define GNU_AS3(x, y, z) #x ", " #y ", " #z ";\n" + #define GNU_AS4(x, y, z, w) #x ", " #y ", " #z ", " #w ";\n" + #define GNU_ASFN(x) "\n_" #x ":\n" #x ":\n" + #define GNU_ASJ(x) ".att_syntax prefix\n" #x "\n.intel_syntax noprefix\n" + + #define a1(x) GNU_AS1(x) + #define a2(x, y) GNU_AS2(x, y) + #define a3(x, y, z) GNU_AS3(x, y, z) + #define a4(x, y, z, w) GNU_AS4(x, y, z, w) + #define aj(x) GNU_ASJ(x) + #define asm_align8 ".p2align 3,,7" + #define asm_align16 ".p2align 4,,15" + + #if defined(OS_WINDOWS) + #define asm_calling_convention CDECL + #define aret(n) a1(ret) + + #if defined(X86_64ASM) + #define asm_naked_fn(fn) ; __asm__ ( \ + ".text\n" \ + asm_align16 GNU_ASFN(fn) \ + "subq $136, %rsp;" \ + "movdqa %xmm6, 0(%rsp);" \ + "movdqa %xmm7, 16(%rsp);" \ + "movdqa %xmm8, 32(%rsp);" \ + "movdqa %xmm9, 48(%rsp);" \ + "movdqa %xmm10, 64(%rsp);" \ + "movdqa %xmm11, 80(%rsp);" \ + "movdqa %xmm12, 96(%rsp);" \ + "movq %rdi, 112(%rsp);" \ + "movq %rsi, 120(%rsp);" \ + "movq %rcx, %rdi;" \ + "movq %rdx, %rsi;" \ + "movq %r8, %rdx;" \ + "movq %r9, %rcx;" \ + "call 1f;" \ + "movdqa 0(%rsp), %xmm6;" \ + "movdqa 16(%rsp), %xmm7;" \ + "movdqa 32(%rsp), %xmm8;" \ + "movdqa 48(%rsp), %xmm9;" \ + "movdqa 64(%rsp), %xmm10;" \ + "movdqa 80(%rsp), %xmm11;" \ + "movdqa 96(%rsp), %xmm12;" \ + "movq 112(%rsp), %rdi;" \ + "movq 120(%rsp), %rsi;" \ + "addq $136, %rsp;" \ + "ret;" \ + ".intel_syntax noprefix;" \ + ".p2align 4,,15;" \ + "1:;" + #else + #define asm_naked_fn(fn) ; __asm__ (".intel_syntax noprefix;\n.text\n" asm_align16 GNU_ASFN(fn) + #endif + #else + #define asm_calling_convention STDCALL + #define aret(n) a1(ret n) + #define asm_naked_fn(fn) ; __asm__ (".intel_syntax noprefix;\n.text\n" asm_align16 GNU_ASFN(fn) + #endif + + #define asm_naked_fn_proto(type, fn) extern type asm_calling_convention fn + #define asm_naked_fn_end(fn) ".att_syntax prefix;\n" ); + + #define asm_gcc() __asm__ __volatile__(".intel_syntax noprefix;\n" + #define asm_gcc_parms() ".att_syntax prefix;" + #define asm_gcc_trashed() __asm__ __volatile__("" ::: + #define asm_gcc_end() ); +#else + need x86 asm +#endif + +#endif /* X86ASM || X86_64ASM */ + + +#if defined(CPU_X86) || defined(CPU_X86_64) + +typedef enum cpu_flags_x86_t { + cpu_mmx = 1 << 0, + cpu_sse = 1 << 1, + cpu_sse2 = 1 << 2, + cpu_sse3 = 1 << 3, + cpu_ssse3 = 1 << 4, + cpu_sse4_1 = 1 << 5, + cpu_sse4_2 = 1 << 6, + cpu_avx = 1 << 7, + cpu_xop = 1 << 8, + cpu_avx2 = 1 << 9 +} cpu_flags_x86; + +typedef enum cpu_vendors_x86_t { + cpu_nobody, + cpu_intel, + cpu_amd +} cpu_vendors_x86; + +typedef struct x86_regs_t { + uint32_t eax, ebx, ecx, edx; +} x86_regs; + +#if defined(X86ASM) +asm_naked_fn_proto(int, has_cpuid)(void) +asm_naked_fn(has_cpuid) + a1(pushfd) + a1(pop eax) + a2(mov ecx, eax) + a2(xor eax, 0x200000) + a1(push eax) + a1(popfd) + a1(pushfd) + a1(pop eax) + a2(xor eax, ecx) + a2(shr eax, 21) + a2(and eax, 1) + a1(push ecx) + a1(popfd) + a1(ret) +asm_naked_fn_end(has_cpuid) +#endif /* X86ASM */ + + +static void NOINLINE +get_cpuid(x86_regs *regs, uint32_t flags) { +#if defined(COMPILER_MSVC) + __cpuid((int *)regs, (int)flags); +#else + #if defined(CPU_X86_64) + #define cpuid_bx rbx + #else + #define cpuid_bx ebx + #endif + + asm_gcc() + a1(push cpuid_bx) + a2(xor ecx, ecx) + a1(cpuid) + a2(mov [%1 + 0], eax) + a2(mov [%1 + 4], ebx) + a2(mov [%1 + 8], ecx) + a2(mov [%1 + 12], edx) + a1(pop cpuid_bx) + asm_gcc_parms() : "+a"(flags) : "S"(regs) : "%ecx", "%edx", "cc" + asm_gcc_end() +#endif +} + +#if defined(X86ASM_AVX) || defined(X86_64ASM_AVX) +static uint64_t NOINLINE +get_xgetbv(uint32_t flags) { +#if defined(COMPILER_MSVC) + return _xgetbv(flags); +#else + uint32_t lo, hi; + asm_gcc() + a1(xgetbv) + asm_gcc_parms() : "+c"(flags), "=a" (lo), "=d" (hi) + asm_gcc_end() + return ((uint64_t)lo | ((uint64_t)hi << 32)); +#endif +} +#endif // AVX support + +#if defined(SCRYPT_TEST_SPEED) +size_t cpu_detect_mask = (size_t)-1; +#endif + +static size_t +detect_cpu(void) { + union { uint8_t s[12]; uint32_t i[3]; } vendor_string; + cpu_vendors_x86 vendor = cpu_nobody; + x86_regs regs; + uint32_t max_level, max_ext_level; + size_t cpu_flags = 0; +#if defined(X86ASM_AVX) || defined(X86_64ASM_AVX) + uint64_t xgetbv_flags; +#endif + +#if defined(CPU_X86) + if (!has_cpuid()) + return cpu_flags; +#endif + + get_cpuid(®s, 0); + max_level = regs.eax; + vendor_string.i[0] = regs.ebx; + vendor_string.i[1] = regs.edx; + vendor_string.i[2] = regs.ecx; + + if (scrypt_verify(vendor_string.s, (const uint8_t *)"GenuineIntel", 12)) + vendor = cpu_intel; + else if (scrypt_verify(vendor_string.s, (const uint8_t *)"AuthenticAMD", 12)) + vendor = cpu_amd; + + if (max_level & 0x00000500) { + /* "Intel P5 pre-B0" */ + cpu_flags |= cpu_mmx; + return cpu_flags; + } + + if (max_level < 1) + return cpu_flags; + + get_cpuid(®s, 1); +#if defined(X86ASM_AVX) || defined(X86_64ASM_AVX) + /* xsave/xrestore */ + if (regs.ecx & (1 << 27)) { + xgetbv_flags = get_xgetbv(0); + if ((regs.ecx & (1 << 28)) && (xgetbv_flags & 0x6)) cpu_flags |= cpu_avx; + } +#endif + if (regs.ecx & (1 << 20)) cpu_flags |= cpu_sse4_2; + if (regs.ecx & (1 << 19)) cpu_flags |= cpu_sse4_2; + if (regs.ecx & (1 << 9)) cpu_flags |= cpu_ssse3; + if (regs.ecx & (1 )) cpu_flags |= cpu_sse3; + if (regs.edx & (1 << 26)) cpu_flags |= cpu_sse2; + if (regs.edx & (1 << 25)) cpu_flags |= cpu_sse; + if (regs.edx & (1 << 23)) cpu_flags |= cpu_mmx; + + if (cpu_flags & cpu_avx) { + if (max_level >= 7) { + get_cpuid(®s, 7); + if (regs.ebx & (1 << 5)) cpu_flags |= cpu_avx2; + } + + get_cpuid(®s, 0x80000000); + max_ext_level = regs.eax; + if (max_ext_level >= 0x80000001) { + get_cpuid(®s, 0x80000001); + if (regs.ecx & (1 << 11)) cpu_flags |= cpu_xop; + } + } + + +#if defined(SCRYPT_TEST_SPEED) + cpu_flags &= cpu_detect_mask; +#endif + + return cpu_flags; +} + +#if defined(SCRYPT_TEST_SPEED) +static const char * +get_top_cpuflag_desc(size_t flag) { + if (flag & cpu_avx2) return "AVX2"; + else if (flag & cpu_xop) return "XOP"; + else if (flag & cpu_avx) return "AVX"; + else if (flag & cpu_sse4_2) return "SSE4.2"; + else if (flag & cpu_sse4_1) return "SSE4.1"; + else if (flag & cpu_ssse3) return "SSSE3"; + else if (flag & cpu_sse2) return "SSE2"; + else if (flag & cpu_sse) return "SSE"; + else if (flag & cpu_mmx) return "MMX"; + else return "Basic"; +} +#endif + +/* enable the highest system-wide option */ +#if defined(SCRYPT_CHOOSE_COMPILETIME) + #if !defined(__AVX2__) + #undef X86_64ASM_AVX2 + #undef X86ASM_AVX2 + #undef X86_INTRINSIC_AVX2 + #endif + #if !defined(__XOP__) + #undef X86_64ASM_XOP + #undef X86ASM_XOP + #undef X86_INTRINSIC_XOP + #endif + #if !defined(__AVX__) + #undef X86_64ASM_AVX + #undef X86ASM_AVX + #undef X86_INTRINSIC_AVX + #endif + #if !defined(__SSSE3__) + #undef X86_64ASM_SSSE3 + #undef X86ASM_SSSE3 + #undef X86_INTRINSIC_SSSE3 + #endif + #if !defined(__SSE2__) + #undef X86_64ASM_SSE2 + #undef X86ASM_SSE2 + #undef X86_INTRINSIC_SSE2 + #endif +#endif + +#endif /* defined(CPU_X86) || defined(CPU_X86_64) */ \ No newline at end of file diff --git a/bf/scrypt-jane/code/scrypt-jane-portable.h b/bf/scrypt-jane/code/scrypt-jane-portable.h new file mode 100644 index 0000000..e83e314 --- /dev/null +++ b/bf/scrypt-jane/code/scrypt-jane-portable.h @@ -0,0 +1,307 @@ +/* determine os */ +#if defined(_WIN32) || defined(_WIN64) || defined(__TOS_WIN__) || defined(__WINDOWS__) + #include + #include + #define OS_WINDOWS +#elif defined(sun) || defined(__sun) || defined(__SVR4) || defined(__svr4__) + #include + #include + #include + + #define OS_SOLARIS +#else + #include + #include + #include /* need this to define BSD */ + #include + #include + + #define OS_NIX + #if defined(__linux__) + #include + #define OS_LINUX + #elif defined(BSD) + #define OS_BSD + + #if defined(MACOS_X) || (defined(__APPLE__) & defined(__MACH__)) + #define OS_OSX + #elif defined(macintosh) || defined(Macintosh) + #define OS_MAC + #elif defined(__OpenBSD__) + #define OS_OPENBSD + #endif + #endif +#endif + + +/* determine compiler */ +#if defined(_MSC_VER) + #define COMPILER_MSVC_VS6 120000000 + #define COMPILER_MSVC_VS6PP 121000000 + #define COMPILER_MSVC_VS2002 130000000 + #define COMPILER_MSVC_VS2003 131000000 + #define COMPILER_MSVC_VS2005 140050727 + #define COMPILER_MSVC_VS2008 150000000 + #define COMPILER_MSVC_VS2008SP1 150030729 + #define COMPILER_MSVC_VS2010 160000000 + #define COMPILER_MSVC_VS2010SP1 160040219 + #define COMPILER_MSVC_VS2012RC 170000000 + #define COMPILER_MSVC_VS2012 170050727 + + #if _MSC_FULL_VER > 100000000 + #define COMPILER_MSVC (_MSC_FULL_VER) + #else + #define COMPILER_MSVC (_MSC_FULL_VER * 10) + #endif + + #if ((_MSC_VER == 1200) && defined(_mm_free)) + #undef COMPILER_MSVC + #define COMPILER_MSVC COMPILER_MSVC_VS6PP + #endif + + #pragma warning(disable : 4127) /* conditional expression is constant */ + #pragma warning(disable : 4100) /* unreferenced formal parameter */ + + #define _CRT_SECURE_NO_WARNINGS + #include + #include /* _rotl */ + #include + + typedef unsigned char uint8_t; + typedef unsigned short uint16_t; + typedef unsigned int uint32_t; + typedef signed int int32_t; + typedef unsigned __int64 uint64_t; + typedef signed __int64 int64_t; + + #define ROTL32(a,b) _rotl(a,b) + #define ROTR32(a,b) _rotr(a,b) + #define ROTL64(a,b) _rotl64(a,b) + #define ROTR64(a,b) _rotr64(a,b) + #undef NOINLINE + #define NOINLINE __declspec(noinline) + #undef NORETURN + #define NORETURN + #undef INLINE + #define INLINE __forceinline + #undef FASTCALL + #define FASTCALL __fastcall + #undef CDECL + #define CDECL __cdecl + #undef STDCALL + #define STDCALL __stdcall + #undef NAKED + #define NAKED __declspec(naked) + #define ALIGN(n) __declspec(align(n)) +#endif +#if defined(__ICC) + #define COMPILER_INTEL +#endif +#if defined(__GNUC__) + #if (__GNUC__ >= 3) + #define COMPILER_GCC_PATCHLEVEL __GNUC_PATCHLEVEL__ + #else + #define COMPILER_GCC_PATCHLEVEL 0 + #endif + #define COMPILER_GCC (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + COMPILER_GCC_PATCHLEVEL) + #define ROTL32(a,b) (((a) << (b)) | ((a) >> (32 - b))) + #define ROTR32(a,b) (((a) >> (b)) | ((a) << (32 - b))) + #define ROTL64(a,b) (((a) << (b)) | ((a) >> (64 - b))) + #define ROTR64(a,b) (((a) >> (b)) | ((a) << (64 - b))) + #undef NOINLINE + #if (COMPILER_GCC >= 30000) + #define NOINLINE __attribute__((noinline)) + #else + #define NOINLINE + #endif + #undef NORETURN + #if (COMPILER_GCC >= 30000) + #define NORETURN __attribute__((noreturn)) + #else + #define NORETURN + #endif + #undef INLINE + #if (COMPILER_GCC >= 30000) + #define INLINE __attribute__((always_inline)) + #else + #define INLINE inline + #endif + #undef FASTCALL + #if (COMPILER_GCC >= 30400) + #define FASTCALL __attribute__((fastcall)) + #else + #define FASTCALL + #endif + #undef CDECL + #define CDECL __attribute__((cdecl)) + #undef STDCALL + #define STDCALL __attribute__((stdcall)) + #define ALIGN(n) __attribute__((aligned(n))) + #include +#endif +#if defined(__MINGW32__) || defined(__MINGW64__) + #define COMPILER_MINGW +#endif +#if defined(__PATHCC__) + #define COMPILER_PATHCC +#endif + +#define OPTIONAL_INLINE +#if defined(OPTIONAL_INLINE) + #undef OPTIONAL_INLINE + #define OPTIONAL_INLINE INLINE +#else + #define OPTIONAL_INLINE +#endif + +#define CRYPTO_FN NOINLINE STDCALL + +/* determine cpu */ +#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__ ) || defined(_M_X64) + #define CPU_X86_64 +#elif defined(__i586__) || defined(__i686__) || (defined(_M_IX86) && (_M_IX86 >= 500)) + #define CPU_X86 500 +#elif defined(__i486__) || (defined(_M_IX86) && (_M_IX86 >= 400)) + #define CPU_X86 400 +#elif defined(__i386__) || (defined(_M_IX86) && (_M_IX86 >= 300)) || defined(__X86__) || defined(_X86_) || defined(__I86__) + #define CPU_X86 300 +#elif defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(_M_IA64) || defined(__ia64) + #define CPU_IA64 +#endif + +#if defined(__sparc__) || defined(__sparc) || defined(__sparcv9) + #define CPU_SPARC + #if defined(__sparcv9) + #define CPU_SPARC64 + #endif +#endif + +#if defined(CPU_X86_64) || defined(CPU_IA64) || defined(CPU_SPARC64) || defined(__64BIT__) || defined(__LP64__) || defined(_LP64) || (defined(_MIPS_SZLONG) && (_MIPS_SZLONG == 64)) + #define CPU_64BITS + #undef FASTCALL + #define FASTCALL + #undef CDECL + #define CDECL + #undef STDCALL + #define STDCALL +#endif + +#if defined(powerpc) || defined(__PPC__) || defined(__ppc__) || defined(_ARCH_PPC) || defined(__powerpc__) || defined(__powerpc) || defined(POWERPC) || defined(_M_PPC) + #define CPU_PPC + #if defined(_ARCH_PWR7) + #define CPU_POWER7 + #elif defined(__64BIT__) + #define CPU_PPC64 + #else + #define CPU_PPC32 + #endif +#endif + +#if defined(__hppa__) || defined(__hppa) + #define CPU_HPPA +#endif + +#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) + #define CPU_ALPHA +#endif + +/* endian */ + +#if ((defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN)) || \ + (defined(BYTE_ORDER) && defined(LITTLE_ENDIAN) && (BYTE_ORDER == LITTLE_ENDIAN)) || \ + (defined(CPU_X86) || defined(CPU_X86_64)) || \ + (defined(vax) || defined(MIPSEL) || defined(_MIPSEL))) +#define CPU_LE +#elif ((defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN)) || \ + (defined(BYTE_ORDER) && defined(BIG_ENDIAN) && (BYTE_ORDER == BIG_ENDIAN)) || \ + (defined(CPU_SPARC) || defined(CPU_PPC) || defined(mc68000) || defined(sel)) || defined(_MIPSEB)) +#define CPU_BE +#else + /* unknown endian! */ +#endif + + +#define U8TO32_BE(p) \ + (((uint32_t)((p)[0]) << 24) | ((uint32_t)((p)[1]) << 16) | \ + ((uint32_t)((p)[2]) << 8) | ((uint32_t)((p)[3]) )) + +#define U8TO32_LE(p) \ + (((uint32_t)((p)[0]) ) | ((uint32_t)((p)[1]) << 8) | \ + ((uint32_t)((p)[2]) << 16) | ((uint32_t)((p)[3]) << 24)) + +#define U32TO8_BE(p, v) \ + (p)[0] = (uint8_t)((v) >> 24); (p)[1] = (uint8_t)((v) >> 16); \ + (p)[2] = (uint8_t)((v) >> 8); (p)[3] = (uint8_t)((v) ); + +#define U32TO8_LE(p, v) \ + (p)[0] = (uint8_t)((v) ); (p)[1] = (uint8_t)((v) >> 8); \ + (p)[2] = (uint8_t)((v) >> 16); (p)[3] = (uint8_t)((v) >> 24); + +#define U8TO64_BE(p) \ + (((uint64_t)U8TO32_BE(p) << 32) | (uint64_t)U8TO32_BE((p) + 4)) + +#define U8TO64_LE(p) \ + (((uint64_t)U8TO32_LE(p)) | ((uint64_t)U8TO32_LE((p) + 4) << 32)) + +#define U64TO8_BE(p, v) \ + U32TO8_BE((p), (uint32_t)((v) >> 32)); \ + U32TO8_BE((p) + 4, (uint32_t)((v) )); + +#define U64TO8_LE(p, v) \ + U32TO8_LE((p), (uint32_t)((v) )); \ + U32TO8_LE((p) + 4, (uint32_t)((v) >> 32)); + +#define U32_SWAP(v) { \ + (v) = (((v) << 8) & 0xFF00FF00 ) | (((v) >> 8) & 0xFF00FF ); \ + (v) = ((v) << 16) | ((v) >> 16); \ +} + +#define U64_SWAP(v) { \ + (v) = (((v) << 8) & 0xFF00FF00FF00FF00ull ) | (((v) >> 8) & 0x00FF00FF00FF00FFull ); \ + (v) = (((v) << 16) & 0xFFFF0000FFFF0000ull ) | (((v) >> 16) & 0x0000FFFF0000FFFFull ); \ + (v) = ((v) << 32) | ((v) >> 32); \ +} + +static int +scrypt_verify(const uint8_t *x, const uint8_t *y, size_t len) { + uint32_t differentbits = 0; + while (len--) + differentbits |= (*x++ ^ *y++); + return (1 & ((differentbits - 1) >> 8)); +} + +static void +scrypt_ensure_zero(void *p, size_t len) { +#if ((defined(CPU_X86) || defined(CPU_X86_64)) && defined(COMPILER_MSVC)) + __stosb((unsigned char *)p, 0, len); +#elif (defined(CPU_X86) && defined(COMPILER_GCC)) + __asm__ __volatile__( + "pushl %%edi;\n" + "pushl %%ecx;\n" + "rep stosb;\n" + "popl %%ecx;\n" + "popl %%edi;\n" + :: "a"(0), "D"(p), "c"(len) : "cc", "memory" + ); +#elif (defined(CPU_X86_64) && defined(COMPILER_GCC)) + __asm__ __volatile__( + "pushq %%rdi;\n" + "pushq %%rcx;\n" + "rep stosb;\n" + "popq %%rcx;\n" + "popq %%rdi;\n" + :: "a"(0), "D"(p), "c"(len) : "cc", "memory" + ); +#else + volatile uint8_t *b = (volatile uint8_t *)p; + size_t i; + for (i = 0; i < len; i++) + b[i] = 0; +#endif +} + +#include "scrypt-jane-portable-x86.h" + +#if !defined(asm_calling_convention) +#define asm_calling_convention +#endif diff --git a/bf/scrypt-jane/code/scrypt-jane-romix-basic.h b/bf/scrypt-jane/code/scrypt-jane-romix-basic.h new file mode 100644 index 0000000..57ba649 --- /dev/null +++ b/bf/scrypt-jane/code/scrypt-jane-romix-basic.h @@ -0,0 +1,74 @@ +#if !defined(SCRYPT_CHOOSE_COMPILETIME) +/* function type returned by scrypt_getROMix, used with cpu detection */ +typedef void (FASTCALL *scrypt_ROMixfn)(scrypt_mix_word_t *X/*[chunkWords]*/, scrypt_mix_word_t *Y/*[chunkWords]*/, scrypt_mix_word_t *V/*[chunkWords * N]*/, uint32_t N, uint32_t r); +#endif + +/* romix pre/post nop function */ +static void asm_calling_convention +scrypt_romix_nop(scrypt_mix_word_t *blocks, size_t nblocks) { + (void)blocks; (void)nblocks; +} + +/* romix pre/post endian conversion function */ +static void asm_calling_convention +scrypt_romix_convert_endian(scrypt_mix_word_t *blocks, size_t nblocks) { +#if !defined(CPU_LE) + static const union { uint8_t b[2]; uint16_t w; } endian_test = {{1,0}}; + size_t i; + if (endian_test.w == 0x100) { + nblocks *= SCRYPT_BLOCK_WORDS; + for (i = 0; i < nblocks; i++) { + SCRYPT_WORD_ENDIAN_SWAP(blocks[i]); + } + } +#else + (void)blocks; (void)nblocks; +#endif +} + +/* chunkmix test function */ +typedef void (asm_calling_convention *chunkmixfn)(scrypt_mix_word_t *Bout/*[chunkWords]*/, scrypt_mix_word_t *Bin/*[chunkWords]*/, scrypt_mix_word_t *Bxor/*[chunkWords]*/, uint32_t r); +typedef void (asm_calling_convention *blockfixfn)(scrypt_mix_word_t *blocks, size_t nblocks); + +static int +scrypt_test_mix_instance(chunkmixfn mixfn, blockfixfn prefn, blockfixfn postfn, const uint8_t expected[16]) { + /* r = 2, (2 * r) = 4 blocks in a chunk, 4 * SCRYPT_BLOCK_WORDS total */ + const uint32_t r = 2, blocks = 2 * r, words = blocks * SCRYPT_BLOCK_WORDS; +#if (defined(X86ASM_AVX2) || defined(X86_64ASM_AVX2) || defined(X86_INTRINSIC_AVX2)) + scrypt_mix_word_t ALIGN(32) chunk[2][4 * SCRYPT_BLOCK_WORDS], v; +#else + scrypt_mix_word_t ALIGN(16) chunk[2][4 * SCRYPT_BLOCK_WORDS], v; +#endif + uint8_t final[16]; + size_t i; + + for (i = 0; i < words; i++) { + v = (scrypt_mix_word_t)i; + v = (v << 8) | v; + v = (v << 16) | v; + chunk[0][i] = v; + } + + prefn(chunk[0], blocks); + mixfn(chunk[1], chunk[0], NULL, r); + postfn(chunk[1], blocks); + + /* grab the last 16 bytes of the final block */ + for (i = 0; i < 16; i += sizeof(scrypt_mix_word_t)) { + SCRYPT_WORDTO8_LE(final + i, chunk[1][words - (16 / sizeof(scrypt_mix_word_t)) + (i / sizeof(scrypt_mix_word_t))]); + } + + return scrypt_verify(expected, final, 16); +} + +/* returns a pointer to item i, where item is len scrypt_mix_word_t's long */ +static scrypt_mix_word_t * +scrypt_item(scrypt_mix_word_t *base, scrypt_mix_word_t i, scrypt_mix_word_t len) { + return base + (i * len); +} + +/* returns a pointer to block i */ +static scrypt_mix_word_t * +scrypt_block(scrypt_mix_word_t *base, scrypt_mix_word_t i) { + return base + (i * SCRYPT_BLOCK_WORDS); +} diff --git a/bf/scrypt-jane/code/scrypt-jane-romix-template.h b/bf/scrypt-jane/code/scrypt-jane-romix-template.h new file mode 100644 index 0000000..6bbda62 --- /dev/null +++ b/bf/scrypt-jane/code/scrypt-jane-romix-template.h @@ -0,0 +1,122 @@ +#if !defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_HAVE_ROMIX) + +#if defined(SCRYPT_CHOOSE_COMPILETIME) +#undef SCRYPT_ROMIX_FN +#define SCRYPT_ROMIX_FN scrypt_ROMix +#endif + +#undef SCRYPT_HAVE_ROMIX +#define SCRYPT_HAVE_ROMIX + +#if !defined(SCRYPT_CHUNKMIX_FN) + +#define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_basic + +/* + Bout = ChunkMix(Bin) + + 2*r: number of blocks in the chunk +*/ +static void asm_calling_convention +SCRYPT_CHUNKMIX_FN(scrypt_mix_word_t *Bout/*[chunkWords]*/, scrypt_mix_word_t *Bin/*[chunkWords]*/, scrypt_mix_word_t *Bxor/*[chunkWords]*/, uint32_t r) { +#if (defined(X86ASM_AVX2) || defined(X86_64ASM_AVX2) || defined(X86_INTRINSIC_AVX2)) + scrypt_mix_word_t ALIGN(32) X[SCRYPT_BLOCK_WORDS], *block; +#else + scrypt_mix_word_t ALIGN(16) X[SCRYPT_BLOCK_WORDS], *block; +#endif + uint32_t i, j, blocksPerChunk = r * 2, half = 0; + + /* 1: X = B_{2r - 1} */ + block = scrypt_block(Bin, blocksPerChunk - 1); + for (i = 0; i < SCRYPT_BLOCK_WORDS; i++) + X[i] = block[i]; + + if (Bxor) { + block = scrypt_block(Bxor, blocksPerChunk - 1); + for (i = 0; i < SCRYPT_BLOCK_WORDS; i++) + X[i] ^= block[i]; + } + + /* 2: for i = 0 to 2r - 1 do */ + for (i = 0; i < blocksPerChunk; i++, half ^= r) { + /* 3: X = H(X ^ B_i) */ + block = scrypt_block(Bin, i); + for (j = 0; j < SCRYPT_BLOCK_WORDS; j++) + X[j] ^= block[j]; + + if (Bxor) { + block = scrypt_block(Bxor, i); + for (j = 0; j < SCRYPT_BLOCK_WORDS; j++) + X[j] ^= block[j]; + } + SCRYPT_MIX_FN(X); + + /* 4: Y_i = X */ + /* 6: B'[0..r-1] = Y_even */ + /* 6: B'[r..2r-1] = Y_odd */ + block = scrypt_block(Bout, (i / 2) + half); + for (j = 0; j < SCRYPT_BLOCK_WORDS; j++) + block[j] = X[j]; + } +} +#endif + +/* + X = ROMix(X) + + X: chunk to mix + Y: scratch chunk + N: number of rounds + V[N]: array of chunks to randomly index in to + 2*r: number of blocks in a chunk +*/ + +static void NOINLINE FASTCALL +SCRYPT_ROMIX_FN(scrypt_mix_word_t *X/*[chunkWords]*/, scrypt_mix_word_t *Y/*[chunkWords]*/, scrypt_mix_word_t *V/*[N * chunkWords]*/, uint32_t N, uint32_t r) { + uint32_t i, j, chunkWords = (uint32_t)(SCRYPT_BLOCK_WORDS * r * 2); + scrypt_mix_word_t *block = V; + + SCRYPT_ROMIX_TANGLE_FN(X, r * 2); + + /* 1: X = B */ + /* implicit */ + + /* 2: for i = 0 to N - 1 do */ + memcpy(block, X, chunkWords * sizeof(scrypt_mix_word_t)); + for (i = 0; i < N - 1; i++, block += chunkWords) { + /* 3: V_i = X */ + /* 4: X = H(X) */ + SCRYPT_CHUNKMIX_FN(block + chunkWords, block, NULL, r); + } + SCRYPT_CHUNKMIX_FN(X, block, NULL, r); + + /* 6: for i = 0 to N - 1 do */ + for (i = 0; i < N; i += 2) { + /* 7: j = Integerify(X) % N */ + j = X[chunkWords - SCRYPT_BLOCK_WORDS] & (N - 1); + + /* 8: X = H(Y ^ V_j) */ + SCRYPT_CHUNKMIX_FN(Y, X, scrypt_item(V, j, chunkWords), r); + + /* 7: j = Integerify(Y) % N */ + j = Y[chunkWords - SCRYPT_BLOCK_WORDS] & (N - 1); + + /* 8: X = H(Y ^ V_j) */ + SCRYPT_CHUNKMIX_FN(X, Y, scrypt_item(V, j, chunkWords), r); + } + + /* 10: B' = X */ + /* implicit */ + + SCRYPT_ROMIX_UNTANGLE_FN(X, r * 2); +} + +#endif /* !defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_HAVE_ROMIX) */ + + +#undef SCRYPT_CHUNKMIX_FN +#undef SCRYPT_ROMIX_FN +#undef SCRYPT_MIX_FN +#undef SCRYPT_ROMIX_TANGLE_FN +#undef SCRYPT_ROMIX_UNTANGLE_FN + diff --git a/bf/scrypt-jane/code/scrypt-jane-romix.h b/bf/scrypt-jane/code/scrypt-jane-romix.h new file mode 100644 index 0000000..84cf612 --- /dev/null +++ b/bf/scrypt-jane/code/scrypt-jane-romix.h @@ -0,0 +1,27 @@ +#if defined(SCRYPT_CHACHA) +#include "scrypt-jane-chacha.h" +#elif defined(SCRYPT_SALSA) +#include "scrypt-jane-salsa.h" +#elif defined(SCRYPT_SALSA64) +#include "scrypt-jane-salsa64.h" +#else + #define SCRYPT_MIX_BASE "ERROR" + typedef uint32_t scrypt_mix_word_t; + #define SCRYPT_WORDTO8_LE U32TO8_LE + #define SCRYPT_WORD_ENDIAN_SWAP U32_SWAP + #define SCRYPT_BLOCK_BYTES 64 + #define SCRYPT_BLOCK_WORDS (SCRYPT_BLOCK_BYTES / sizeof(scrypt_mix_word_t)) + #if !defined(SCRYPT_CHOOSE_COMPILETIME) + static void FASTCALL scrypt_ROMix_error(scrypt_mix_word_t *X/*[chunkWords]*/, scrypt_mix_word_t *Y/*[chunkWords]*/, scrypt_mix_word_t *V/*[chunkWords * N]*/, uint32_t N, uint32_t r) {} + static scrypt_ROMixfn scrypt_getROMix(void) { return scrypt_ROMix_error; } + #else + static void FASTCALL scrypt_ROMix(scrypt_mix_word_t *X, scrypt_mix_word_t *Y, scrypt_mix_word_t *V, uint32_t N, uint32_t r) {} + #endif + static int scrypt_test_mix(void) { return 0; } + #error must define a mix function! +#endif + +#if !defined(SCRYPT_CHOOSE_COMPILETIME) +#undef SCRYPT_MIX +#define SCRYPT_MIX SCRYPT_MIX_BASE +#endif diff --git a/bf/scrypt-jane/code/scrypt-jane-salsa.h b/bf/scrypt-jane/code/scrypt-jane-salsa.h new file mode 100644 index 0000000..df0a3e0 --- /dev/null +++ b/bf/scrypt-jane/code/scrypt-jane-salsa.h @@ -0,0 +1,134 @@ +#define SCRYPT_MIX_BASE "Salsa20/8" + +typedef uint32_t scrypt_mix_word_t; + +#define SCRYPT_WORDTO8_LE U32TO8_LE +#define SCRYPT_WORD_ENDIAN_SWAP U32_SWAP + +#define SCRYPT_BLOCK_BYTES 64 +#define SCRYPT_BLOCK_WORDS (SCRYPT_BLOCK_BYTES / sizeof(scrypt_mix_word_t)) + +/* must have these here in case block bytes is ever != 64 */ +#include "scrypt-jane-romix-basic.h" + +#include "scrypt-jane-mix_salsa-xop.h" +#include "scrypt-jane-mix_salsa-avx.h" +#include "scrypt-jane-mix_salsa-sse2.h" +#include "scrypt-jane-mix_salsa.h" + +#if defined(SCRYPT_SALSA_XOP) + #define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_xop + #define SCRYPT_ROMIX_FN scrypt_ROMix_xop + #define SCRYPT_ROMIX_TANGLE_FN salsa_core_tangle_sse2 + #define SCRYPT_ROMIX_UNTANGLE_FN salsa_core_tangle_sse2 + #include "scrypt-jane-romix-template.h" +#endif + +#if defined(SCRYPT_SALSA_AVX) + #define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_avx + #define SCRYPT_ROMIX_FN scrypt_ROMix_avx + #define SCRYPT_ROMIX_TANGLE_FN salsa_core_tangle_sse2 + #define SCRYPT_ROMIX_UNTANGLE_FN salsa_core_tangle_sse2 + #include "scrypt-jane-romix-template.h" +#endif + +#if defined(SCRYPT_SALSA_SSE2) + #define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_sse2 + #define SCRYPT_ROMIX_FN scrypt_ROMix_sse2 + #define SCRYPT_MIX_FN salsa_core_sse2 + #define SCRYPT_ROMIX_TANGLE_FN salsa_core_tangle_sse2 + #define SCRYPT_ROMIX_UNTANGLE_FN salsa_core_tangle_sse2 + #include "scrypt-jane-romix-template.h" +#endif + +/* cpu agnostic */ +#define SCRYPT_ROMIX_FN scrypt_ROMix_basic +#define SCRYPT_MIX_FN salsa_core_basic +#define SCRYPT_ROMIX_TANGLE_FN scrypt_romix_convert_endian +#define SCRYPT_ROMIX_UNTANGLE_FN scrypt_romix_convert_endian +#include "scrypt-jane-romix-template.h" + +#if !defined(SCRYPT_CHOOSE_COMPILETIME) +static scrypt_ROMixfn +scrypt_getROMix(void) { + size_t cpuflags = detect_cpu(); + +#if defined(SCRYPT_SALSA_XOP) + if (cpuflags & cpu_xop) + return scrypt_ROMix_xop; + else +#endif + +#if defined(SCRYPT_SALSA_AVX) + if (cpuflags & cpu_avx) + return scrypt_ROMix_avx; + else +#endif + +#if defined(SCRYPT_SALSA_SSE2) + if (cpuflags & cpu_sse2) + return scrypt_ROMix_sse2; + else +#endif + + return scrypt_ROMix_basic; +} +#endif + + +#if defined(SCRYPT_TEST_SPEED) +static size_t +available_implementations(void) { + size_t cpuflags = detect_cpu(); + size_t flags = 0; + +#if defined(SCRYPT_SALSA_XOP) + if (cpuflags & cpu_xop) + flags |= cpu_xop; +#endif + +#if defined(SCRYPT_SALSA_AVX) + if (cpuflags & cpu_avx) + flags |= cpu_avx; +#endif + +#if defined(SCRYPT_SALSA_SSE2) + if (cpuflags & cpu_sse2) + flags |= cpu_sse2; +#endif + + return flags; +} +#endif + + +static int +scrypt_test_mix(void) { + static const uint8_t expected[16] = { + 0x41,0x1f,0x2e,0xa3,0xab,0xa3,0x1a,0x34,0x87,0x1d,0x8a,0x1c,0x76,0xa0,0x27,0x66, + }; + + int ret = 1; + size_t cpuflags = detect_cpu(); + +#if defined(SCRYPT_SALSA_XOP) + if (cpuflags & cpu_xop) + ret &= scrypt_test_mix_instance(scrypt_ChunkMix_xop, salsa_core_tangle_sse2, salsa_core_tangle_sse2, expected); +#endif + +#if defined(SCRYPT_SALSA_AVX) + if (cpuflags & cpu_avx) + ret &= scrypt_test_mix_instance(scrypt_ChunkMix_avx, salsa_core_tangle_sse2, salsa_core_tangle_sse2, expected); +#endif + +#if defined(SCRYPT_SALSA_SSE2) + if (cpuflags & cpu_sse2) + ret &= scrypt_test_mix_instance(scrypt_ChunkMix_sse2, salsa_core_tangle_sse2, salsa_core_tangle_sse2, expected); +#endif + +#if defined(SCRYPT_SALSA_BASIC) + ret &= scrypt_test_mix_instance(scrypt_ChunkMix_basic, scrypt_romix_convert_endian, scrypt_romix_convert_endian, expected); +#endif + + return ret; +} diff --git a/bf/scrypt-jane/code/scrypt-jane-salsa64.h b/bf/scrypt-jane/code/scrypt-jane-salsa64.h new file mode 100644 index 0000000..96b7813 --- /dev/null +++ b/bf/scrypt-jane/code/scrypt-jane-salsa64.h @@ -0,0 +1,183 @@ +#define SCRYPT_MIX_BASE "Salsa64/8" + +typedef uint64_t scrypt_mix_word_t; + +#define SCRYPT_WORDTO8_LE U64TO8_LE +#define SCRYPT_WORD_ENDIAN_SWAP U64_SWAP + +#define SCRYPT_BLOCK_BYTES 128 +#define SCRYPT_BLOCK_WORDS (SCRYPT_BLOCK_BYTES / sizeof(scrypt_mix_word_t)) + +/* must have these here in case block bytes is ever != 64 */ +#include "scrypt-jane-romix-basic.h" + +#include "scrypt-jane-mix_salsa64-avx2.h" +#include "scrypt-jane-mix_salsa64-xop.h" +#include "scrypt-jane-mix_salsa64-avx.h" +#include "scrypt-jane-mix_salsa64-ssse3.h" +#include "scrypt-jane-mix_salsa64-sse2.h" +#include "scrypt-jane-mix_salsa64.h" + +#if defined(SCRYPT_SALSA64_AVX2) + #define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_avx2 + #define SCRYPT_ROMIX_FN scrypt_ROMix_avx2 + #define SCRYPT_ROMIX_TANGLE_FN salsa64_core_tangle_sse2 + #define SCRYPT_ROMIX_UNTANGLE_FN salsa64_core_tangle_sse2 + #include "scrypt-jane-romix-template.h" +#endif + +#if defined(SCRYPT_SALSA64_XOP) + #define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_xop + #define SCRYPT_ROMIX_FN scrypt_ROMix_xop + #define SCRYPT_ROMIX_TANGLE_FN salsa64_core_tangle_sse2 + #define SCRYPT_ROMIX_UNTANGLE_FN salsa64_core_tangle_sse2 + #include "scrypt-jane-romix-template.h" +#endif + +#if defined(SCRYPT_SALSA64_AVX) + #define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_avx + #define SCRYPT_ROMIX_FN scrypt_ROMix_avx + #define SCRYPT_ROMIX_TANGLE_FN salsa64_core_tangle_sse2 + #define SCRYPT_ROMIX_UNTANGLE_FN salsa64_core_tangle_sse2 + #include "scrypt-jane-romix-template.h" +#endif + +#if defined(SCRYPT_SALSA64_SSSE3) + #define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_ssse3 + #define SCRYPT_ROMIX_FN scrypt_ROMix_ssse3 + #define SCRYPT_ROMIX_TANGLE_FN salsa64_core_tangle_sse2 + #define SCRYPT_ROMIX_UNTANGLE_FN salsa64_core_tangle_sse2 + #include "scrypt-jane-romix-template.h" +#endif + +#if defined(SCRYPT_SALSA64_SSE2) + #define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_sse2 + #define SCRYPT_ROMIX_FN scrypt_ROMix_sse2 + #define SCRYPT_ROMIX_TANGLE_FN salsa64_core_tangle_sse2 + #define SCRYPT_ROMIX_UNTANGLE_FN salsa64_core_tangle_sse2 + #include "scrypt-jane-romix-template.h" +#endif + +/* cpu agnostic */ +#define SCRYPT_ROMIX_FN scrypt_ROMix_basic +#define SCRYPT_MIX_FN salsa64_core_basic +#define SCRYPT_ROMIX_TANGLE_FN scrypt_romix_convert_endian +#define SCRYPT_ROMIX_UNTANGLE_FN scrypt_romix_convert_endian +#include "scrypt-jane-romix-template.h" + +#if !defined(SCRYPT_CHOOSE_COMPILETIME) +static scrypt_ROMixfn +scrypt_getROMix(void) { + size_t cpuflags = detect_cpu(); + +#if defined(SCRYPT_SALSA64_AVX2) + if (cpuflags & cpu_avx2) + return scrypt_ROMix_avx2; + else +#endif + +#if defined(SCRYPT_SALSA64_XOP) + if (cpuflags & cpu_xop) + return scrypt_ROMix_xop; + else +#endif + +#if defined(SCRYPT_SALSA64_AVX) + if (cpuflags & cpu_avx) + return scrypt_ROMix_avx; + else +#endif + +#if defined(SCRYPT_SALSA64_SSSE3) + if (cpuflags & cpu_ssse3) + return scrypt_ROMix_ssse3; + else +#endif + +#if defined(SCRYPT_SALSA64_SSE2) + if (cpuflags & cpu_sse2) + return scrypt_ROMix_sse2; + else +#endif + + return scrypt_ROMix_basic; +} +#endif + + +#if defined(SCRYPT_TEST_SPEED) +static size_t +available_implementations(void) { + size_t cpuflags = detect_cpu(); + size_t flags = 0; + +#if defined(SCRYPT_SALSA64_AVX2) + if (cpuflags & cpu_avx2) + flags |= cpu_avx2; +#endif + +#if defined(SCRYPT_SALSA64_XOP) + if (cpuflags & cpu_xop) + flags |= cpu_xop; +#endif + +#if defined(SCRYPT_SALSA64_AVX) + if (cpuflags & cpu_avx) + flags |= cpu_avx; +#endif + +#if defined(SCRYPT_SALSA64_SSSE3) + if (cpuflags & cpu_ssse3) + flags |= cpu_ssse3; +#endif + +#if defined(SCRYPT_SALSA64_SSE2) + if (cpuflags & cpu_sse2) + flags |= cpu_sse2; +#endif + + return flags; +} +#endif + +static int +scrypt_test_mix(void) { + static const uint8_t expected[16] = { + 0xf8,0x92,0x9b,0xf8,0xcc,0x1d,0xce,0x2e,0x13,0x82,0xac,0x96,0xb2,0x6c,0xee,0x2c, + }; + + int ret = 1; + size_t cpuflags = detect_cpu(); + +#if defined(SCRYPT_SALSA64_AVX2) + if (cpuflags & cpu_avx2) + ret &= scrypt_test_mix_instance(scrypt_ChunkMix_avx2, salsa64_core_tangle_sse2, salsa64_core_tangle_sse2, expected); +#endif + +#if defined(SCRYPT_SALSA64_XOP) + if (cpuflags & cpu_xop) + ret &= scrypt_test_mix_instance(scrypt_ChunkMix_xop, salsa64_core_tangle_sse2, salsa64_core_tangle_sse2, expected); +#endif + +#if defined(SCRYPT_SALSA64_AVX) + if (cpuflags & cpu_avx) + ret &= scrypt_test_mix_instance(scrypt_ChunkMix_avx, salsa64_core_tangle_sse2, salsa64_core_tangle_sse2, expected); +#endif + +#if defined(SCRYPT_SALSA64_SSSE3) + if (cpuflags & cpu_ssse3) + ret &= scrypt_test_mix_instance(scrypt_ChunkMix_ssse3, salsa64_core_tangle_sse2, salsa64_core_tangle_sse2, expected); +#endif + +#if defined(SCRYPT_SALSA64_SSE2) + if (cpuflags & cpu_sse2) + ret &= scrypt_test_mix_instance(scrypt_ChunkMix_sse2, salsa64_core_tangle_sse2, salsa64_core_tangle_sse2, expected); +#endif + +#if defined(SCRYPT_SALSA64_BASIC) + ret &= scrypt_test_mix_instance(scrypt_ChunkMix_basic, scrypt_romix_convert_endian, scrypt_romix_convert_endian, expected); +#endif + + return ret; +} + diff --git a/bf/scrypt-jane/code/scrypt-jane-test-vectors.h b/bf/scrypt-jane/code/scrypt-jane-test-vectors.h new file mode 100644 index 0000000..72a7276 --- /dev/null +++ b/bf/scrypt-jane/code/scrypt-jane-test-vectors.h @@ -0,0 +1,261 @@ +typedef struct scrypt_test_setting_t { + const char *pw, *salt; + uint8_t Nfactor, rfactor, pfactor; +} scrypt_test_setting; + +static const scrypt_test_setting post_settings[] = { + {"", "", 3, 0, 0}, + {"password", "NaCl", 9, 3, 4}, + {0, 0, 0, 0, 0} +}; + +#if defined(SCRYPT_SHA256) + #if defined(SCRYPT_SALSA) + /* sha256 + salsa20/8, the only 'official' test vectors! */ + static const uint8_t post_vectors[][64] = { + {0x77,0xd6,0x57,0x62,0x38,0x65,0x7b,0x20,0x3b,0x19,0xca,0x42,0xc1,0x8a,0x04,0x97, + 0xf1,0x6b,0x48,0x44,0xe3,0x07,0x4a,0xe8,0xdf,0xdf,0xfa,0x3f,0xed,0xe2,0x14,0x42, + 0xfc,0xd0,0x06,0x9d,0xed,0x09,0x48,0xf8,0x32,0x6a,0x75,0x3a,0x0f,0xc8,0x1f,0x17, + 0xe8,0xd3,0xe0,0xfb,0x2e,0x0d,0x36,0x28,0xcf,0x35,0xe2,0x0c,0x38,0xd1,0x89,0x06}, + {0xfd,0xba,0xbe,0x1c,0x9d,0x34,0x72,0x00,0x78,0x56,0xe7,0x19,0x0d,0x01,0xe9,0xfe, + 0x7c,0x6a,0xd7,0xcb,0xc8,0x23,0x78,0x30,0xe7,0x73,0x76,0x63,0x4b,0x37,0x31,0x62, + 0x2e,0xaf,0x30,0xd9,0x2e,0x22,0xa3,0x88,0x6f,0xf1,0x09,0x27,0x9d,0x98,0x30,0xda, + 0xc7,0x27,0xaf,0xb9,0x4a,0x83,0xee,0x6d,0x83,0x60,0xcb,0xdf,0xa2,0xcc,0x06,0x40} + }; + #elif defined(SCRYPT_CHACHA) + static const uint8_t post_vectors[][64] = { + {0xef,0x8f,0x44,0x8f,0xc3,0xef,0x78,0x13,0xb2,0x26,0xa7,0x2a,0x40,0xa1,0x98,0x7f, + 0xc8,0x7f,0x0d,0x5f,0x40,0x66,0xa2,0x05,0x07,0x4f,0xc7,0xac,0x3b,0x47,0x07,0x0c, + 0xf5,0x20,0x46,0x76,0x20,0x7b,0xee,0x51,0x6d,0x5f,0xfa,0x9c,0x27,0xac,0xa9,0x36, + 0x62,0xbd,0xde,0x0b,0xa3,0xc0,0x66,0x84,0xde,0x82,0xd0,0x1a,0xb4,0xd1,0xb5,0xfe}, + {0xf1,0x94,0xf7,0x5f,0x15,0x12,0x10,0x4d,0x6e,0xfb,0x04,0x8c,0x35,0xc4,0x51,0xb6, + 0x11,0x04,0xa7,0x9b,0xb0,0x46,0xaf,0x7b,0x47,0x39,0xf0,0xac,0xb2,0x8a,0xfa,0x45, + 0x09,0x86,0x8f,0x10,0x4b,0xc6,0xee,0x00,0x11,0x38,0x73,0x7a,0x6a,0xd8,0x25,0x67, + 0x85,0xa4,0x10,0x4e,0xa9,0x2f,0x15,0xfe,0xcf,0x63,0xe1,0xe8,0xcf,0xab,0xe8,0xbd} + }; + #elif defined(SCRYPT_SALSA64) + static const uint8_t post_vectors[][64] = { + {0xf4,0x87,0x29,0xf4,0xc3,0x31,0x8c,0xe8,0xdf,0xe5,0xd8,0x73,0xff,0xca,0x32,0xcf, + 0xd8,0xac,0xe7,0xf7,0x15,0xda,0x84,0x41,0x60,0x23,0x26,0x4a,0xc8,0x3e,0xee,0xa6, + 0xa5,0x6e,0x52,0xd6,0x64,0x55,0x16,0x31,0x3e,0x66,0x7b,0x65,0xd5,0xe2,0xc9,0x95, + 0x1b,0xf0,0x81,0x40,0xb7,0x2f,0xff,0xa6,0xe6,0x02,0xcc,0x63,0x08,0x4a,0x74,0x31}, + {0x7a,0xd8,0xad,0x02,0x9c,0xa5,0xf4,0x42,0x6a,0x29,0xd2,0xb5,0x53,0xf1,0x6d,0x1d, + 0x25,0xc8,0x70,0x48,0x80,0xb9,0xa3,0xf6,0x94,0xf8,0xfa,0xb8,0x52,0x42,0xcd,0x14, + 0x26,0x46,0x28,0x06,0xc7,0xf6,0x1f,0xa7,0x89,0x6d,0xc5,0xa0,0x36,0xcc,0xde,0xcb, + 0x73,0x0b,0xa4,0xe2,0xd3,0xd1,0x44,0x06,0x35,0x08,0xe0,0x35,0x5b,0xf8,0xd7,0xe7} + }; + #endif +#elif defined(SCRYPT_SHA512) + #if defined(SCRYPT_SALSA) + static const uint8_t post_vectors[][64] = { + {0xae,0x54,0xe7,0x74,0xe4,0x51,0x6b,0x0f,0xe1,0xe7,0x28,0x03,0x17,0xe4,0x8c,0xfa, + 0x2f,0x66,0x55,0x7f,0xdc,0x3b,0x40,0xab,0x47,0x84,0xc9,0x63,0x36,0x07,0x9d,0xe5, + 0x86,0x43,0x95,0x89,0xb6,0xc0,0x6c,0x72,0x64,0x00,0xc1,0x2a,0xd7,0x69,0x21,0x92, + 0x8e,0xba,0xa4,0x59,0x9f,0x00,0x14,0x3a,0x7c,0x12,0x58,0x91,0x09,0xa0,0x32,0xfe}, + {0xc5,0xb3,0xd6,0xea,0x0a,0x4b,0x1e,0xcc,0x40,0x00,0xe5,0x98,0x5c,0xdc,0x06,0x06, + 0x78,0x34,0x92,0x16,0xcf,0xe4,0x9f,0x03,0x96,0x2d,0x41,0x35,0x00,0x9b,0xff,0x74, + 0x60,0x19,0x6e,0xe6,0xa6,0x46,0xf7,0x37,0xcb,0xfa,0xd0,0x9f,0x80,0x72,0x2e,0x85, + 0x13,0x3e,0x1a,0x91,0x90,0x53,0xa1,0x33,0x85,0x51,0xdc,0x62,0x1c,0x0e,0x4d,0x30} + }; + #elif defined(SCRYPT_CHACHA) + static const uint8_t post_vectors[][64] = { + {0xe2,0x05,0x7c,0x44,0xf9,0x55,0x9f,0x64,0xbe,0xd5,0x7f,0x85,0x69,0xc7,0x8c,0x7f, + 0x2b,0x91,0xd6,0x9a,0x6c,0xf8,0x57,0x55,0x61,0x25,0x3d,0xee,0xb8,0xd5,0x8c,0xdc, + 0x2d,0xd5,0x53,0x84,0x8c,0x06,0xaa,0x37,0x77,0xa6,0xf0,0xf1,0x35,0xfe,0xb5,0xcb, + 0x61,0xd7,0x2c,0x67,0xf3,0x7e,0x8a,0x1b,0x04,0xa3,0xa3,0x43,0xa2,0xb2,0x29,0xf2}, + {0x82,0xda,0x29,0xb2,0x08,0x27,0xfc,0x78,0x22,0xc4,0xb8,0x7e,0xbc,0x36,0xcf,0xcd, + 0x17,0x4b,0xa1,0x30,0x16,0x4a,0x25,0x70,0xc7,0xcb,0xe0,0x2b,0x56,0xd3,0x16,0x4e, + 0x85,0xb6,0x84,0xe7,0x9b,0x7f,0x8b,0xb5,0x94,0x33,0xcf,0x33,0x44,0x65,0xc8,0xa1, + 0x46,0xf9,0xf5,0xfc,0x74,0x29,0x7e,0xd5,0x46,0xec,0xbd,0x95,0xc1,0x80,0x24,0xe4} + }; + #elif defined(SCRYPT_SALSA64) + static const uint8_t post_vectors[][64] = { + {0xa6,0xcb,0x77,0x9a,0x64,0x1f,0x95,0x02,0x53,0xe7,0x5c,0x78,0xdb,0xa3,0x43,0xff, + 0xbe,0x10,0x4c,0x7b,0xe4,0xe1,0x91,0xcf,0x67,0x69,0x5a,0x2c,0x12,0xd6,0x99,0x49, + 0x92,0xfd,0x5a,0xaa,0x12,0x4c,0x2e,0xf6,0x95,0x46,0x8f,0x5e,0x77,0x62,0x16,0x29, + 0xdb,0xe7,0xab,0x02,0x2b,0x9c,0x35,0x03,0xf8,0xd4,0x04,0x7d,0x2d,0x73,0x85,0xf1}, + {0x54,0xb7,0xca,0xbb,0xaf,0x0f,0xb0,0x5f,0xb7,0x10,0x63,0x48,0xb3,0x15,0xd8,0xb5, + 0x62,0x64,0x89,0x6a,0x59,0xc6,0x0f,0x86,0x96,0x38,0xf0,0xcf,0xd4,0x62,0x90,0x61, + 0x7d,0xce,0xd6,0x13,0x85,0x67,0x4a,0xf5,0x32,0x03,0x74,0x30,0x0b,0x5a,0x2f,0x86, + 0x82,0x6e,0x0c,0x3e,0x40,0x7a,0xde,0xbe,0x42,0x6e,0x80,0x2b,0xaf,0xdb,0xcc,0x94} + }; + #endif +#elif defined(SCRYPT_BLAKE512) + #if defined(SCRYPT_SALSA) + static const uint8_t post_vectors[][64] = { + {0x4a,0x48,0xb3,0xfa,0xdc,0xb0,0xb8,0xdb,0x54,0xee,0xf3,0x5c,0x27,0x65,0x6c,0x20, + 0xab,0x61,0x9a,0x5b,0xd5,0x1d,0xd9,0x95,0xab,0x88,0x0e,0x4d,0x1e,0x71,0x2f,0x11, + 0x43,0x2e,0xef,0x23,0xca,0x8a,0x49,0x3b,0x11,0x38,0xa5,0x28,0x61,0x2f,0xb7,0x89, + 0x5d,0xef,0x42,0x4c,0xc1,0x74,0xea,0x8a,0x56,0xbe,0x4a,0x82,0x76,0x15,0x1a,0x87}, + {0x96,0x24,0xbf,0x40,0xeb,0x03,0x8e,0xfe,0xc0,0xd5,0xa4,0x81,0x85,0x7b,0x09,0x88, + 0x52,0xb5,0xcb,0xc4,0x48,0xe1,0xb9,0x1d,0x3f,0x8b,0x3a,0xc6,0x38,0x32,0xc7,0x55, + 0x30,0x28,0x7a,0x42,0xa9,0x5d,0x54,0x33,0x62,0xf3,0xd9,0x3c,0x96,0x40,0xd1,0x80, + 0xe4,0x0e,0x7e,0xf0,0x64,0x53,0xfe,0x7b,0xd7,0x15,0xba,0xad,0x16,0x80,0x01,0xb5} + }; + #elif defined(SCRYPT_CHACHA) + static const uint8_t post_vectors[][64] = { + {0x45,0x42,0x22,0x31,0x26,0x13,0x5f,0x94,0xa4,0x00,0x04,0x47,0xe8,0x50,0x6d,0xd6, + 0xdd,0xd5,0x08,0xd4,0x90,0x64,0xe0,0x59,0x70,0x46,0xff,0xfc,0x29,0xb3,0x6a,0xc9, + 0x4d,0x45,0x97,0x95,0xa8,0xf0,0x53,0xe7,0xee,0x4b,0x6b,0x5d,0x1e,0xa5,0xb2,0x58, + 0x4b,0x93,0xc9,0x89,0x4c,0xa8,0xab,0x03,0x74,0x38,0xbd,0x54,0x97,0x6b,0xab,0x4a}, + {0x4b,0x4a,0x63,0x96,0x73,0x34,0x9f,0x39,0x64,0x51,0x0e,0x2e,0x3b,0x07,0xd5,0x1c, + 0xd2,0xf7,0xce,0x60,0xab,0xac,0x89,0xa4,0x16,0x0c,0x58,0x82,0xb3,0xd3,0x25,0x5b, + 0xd5,0x62,0x32,0xf4,0x86,0x5d,0xb2,0x4b,0xbf,0x8e,0xc6,0xc0,0xac,0x40,0x48,0xb4, + 0x69,0x08,0xba,0x40,0x4b,0x07,0x2a,0x13,0x9c,0x98,0x3b,0x8b,0x20,0x0c,0xac,0x9e} + }; + #elif defined(SCRYPT_SALSA64) + static const uint8_t post_vectors[][64] = { + {0xcb,0x4b,0xc2,0xd1,0xf4,0x77,0x32,0x3c,0x42,0x9d,0xf7,0x7d,0x1f,0x22,0x64,0xa4, + 0xe2,0x88,0x30,0x2d,0x54,0x9d,0xb6,0x26,0x89,0x25,0x30,0xc3,0x3d,0xdb,0xba,0x99, + 0xe9,0x8e,0x1e,0x5e,0x57,0x66,0x75,0x7c,0x24,0xda,0x00,0x6f,0x79,0xf7,0x47,0xf5, + 0xea,0x40,0x70,0x37,0xd2,0x91,0xc7,0x4d,0xdf,0x46,0xb6,0x3e,0x95,0x7d,0xcb,0xc1}, + {0x25,0xc2,0xcb,0x7f,0xc8,0x50,0xb7,0x0b,0x11,0x9e,0x1d,0x10,0xb2,0xa8,0x35,0x23, + 0x91,0x39,0xfb,0x45,0xf2,0xbf,0xe4,0xd0,0x84,0xec,0x72,0x33,0x6d,0x09,0xed,0x41, + 0x9a,0x7e,0x4f,0x10,0x73,0x97,0x22,0x76,0x58,0x93,0x39,0x24,0xdf,0xd2,0xaa,0x2f, + 0x6b,0x2b,0x64,0x48,0xa5,0xb7,0xf5,0x56,0x77,0x02,0xa7,0x71,0x46,0xe5,0x0e,0x8d}, + }; + #endif +#elif defined(SCRYPT_BLAKE256) + #if defined(SCRYPT_SALSA) + static const uint8_t post_vectors[][64] = { + {0xf1,0xf1,0x91,0x1a,0x81,0xe6,0x9f,0xc1,0xce,0x43,0xab,0xb1,0x1a,0x02,0x1e,0x16, + 0x08,0xc6,0xf9,0x00,0x50,0x1b,0x6d,0xf1,0x31,0x06,0x95,0x48,0x5d,0xf7,0x6c,0x00, + 0xa2,0x4c,0xb1,0x0e,0x52,0x66,0x94,0x7e,0x84,0xfc,0xa5,0x34,0xfd,0xf0,0xe9,0x57, + 0x85,0x2d,0x8c,0x05,0x5c,0x0f,0x04,0xd4,0x8d,0x3e,0x13,0x52,0x3d,0x90,0x2d,0x2c}, + {0xd5,0x42,0xd2,0x7b,0x06,0xae,0x63,0x90,0x9e,0x30,0x00,0x0e,0xd8,0xa4,0x3a,0x0b, + 0xee,0x4a,0xef,0xb2,0xc4,0x95,0x0d,0x72,0x07,0x70,0xcc,0xa3,0xf9,0x1e,0xc2,0x75, + 0xcf,0xaf,0xe1,0x44,0x1c,0x8c,0xe2,0x3e,0x0c,0x81,0xf3,0x92,0xe1,0x13,0xe6,0x4f, + 0x2d,0x27,0xc3,0x87,0xe5,0xb6,0xf9,0xd7,0x02,0x04,0x37,0x64,0x78,0x36,0x6e,0xb3} + }; + #elif defined(SCRYPT_CHACHA) + static const uint8_t post_vectors[][64] = { + {0xad,0x1b,0x4b,0xca,0xe3,0x26,0x1a,0xfd,0xb7,0x77,0x8c,0xde,0x8d,0x26,0x14,0xe1, + 0x54,0x38,0x42,0xf3,0xb3,0x66,0x29,0xf9,0x90,0x04,0xf1,0x82,0x7c,0x5a,0x6f,0xa8, + 0x7d,0xd6,0x08,0x0d,0x8b,0x78,0x04,0xad,0x31,0xea,0xd4,0x87,0x2d,0xf7,0x74,0x9a, + 0xe5,0xce,0x97,0xef,0xa3,0xbb,0x90,0x46,0x7c,0xf4,0x51,0x38,0xc7,0x60,0x53,0x21}, + {0x39,0xbb,0x56,0x3d,0x0d,0x7b,0x74,0x82,0xfe,0x5a,0x78,0x3d,0x66,0xe8,0x3a,0xdf, + 0x51,0x6f,0x3e,0xf4,0x86,0x20,0x8d,0xe1,0x81,0x22,0x02,0xf7,0x0d,0xb5,0x1a,0x0f, + 0xfc,0x59,0xb6,0x60,0xc9,0xdb,0x38,0x0b,0x5b,0x95,0xa5,0x94,0xda,0x42,0x2d,0x90, + 0x47,0xeb,0x73,0x31,0x9f,0x20,0xf6,0x81,0xc2,0xef,0x33,0x77,0x51,0xd8,0x2c,0xe4} + }; + #elif defined(SCRYPT_SALSA64) + static const uint8_t post_vectors[][64] = { + {0x9e,0xf2,0x60,0x7c,0xbd,0x7c,0x19,0x5c,0x79,0xc6,0x1b,0x7e,0xb0,0x65,0x1b,0xc3, + 0x70,0x0d,0x89,0xfc,0x72,0xb2,0x03,0x72,0x15,0xcb,0x8e,0x8c,0x49,0x50,0x4c,0x27, + 0x99,0xda,0x47,0x32,0x5e,0xb4,0xa2,0x07,0x83,0x51,0x6b,0x06,0x37,0x60,0x42,0xc4, + 0x59,0x49,0x99,0xdd,0xc0,0xd2,0x08,0x94,0x7f,0xe3,0x9e,0x4e,0x43,0x8e,0x5b,0xba}, + {0x86,0x6f,0x3b,0x11,0xb8,0xca,0x4b,0x6e,0xa7,0x6f,0xc2,0xc9,0x33,0xb7,0x8b,0x9f, + 0xa3,0xb9,0xf5,0xb5,0x62,0xa6,0x17,0x66,0xe4,0xc3,0x9d,0x9b,0xca,0x51,0xb0,0x2f, + 0xda,0x09,0xc1,0x77,0xed,0x8b,0x89,0xc2,0x69,0x5a,0x34,0x05,0x4a,0x1f,0x4d,0x76, + 0xcb,0xd5,0xa4,0x78,0xfa,0x1b,0xb9,0x5b,0xbc,0x3d,0xce,0x04,0x63,0x99,0xad,0x54} + }; + #endif +#elif defined(SCRYPT_SKEIN512) + #if defined(SCRYPT_SALSA) + static const uint8_t post_vectors[][64] = { + {0xe4,0x36,0xa0,0x9a,0xdb,0xf0,0xd1,0x45,0x56,0xda,0x25,0x53,0x00,0xf9,0x2c,0x69, + 0xa4,0xc2,0xa5,0x8e,0x1a,0x85,0xfa,0x53,0xbd,0x55,0x3d,0x11,0x2a,0x44,0x13,0x87, + 0x8f,0x81,0x88,0x13,0x1e,0x49,0xa8,0xc4,0xc5,0xcd,0x1f,0xe1,0x5f,0xf5,0xcb,0x2f, + 0x8b,0xab,0x57,0x38,0x59,0xeb,0x6b,0xac,0x3b,0x73,0x10,0xa6,0xe1,0xfe,0x17,0x3e}, + {0x6d,0x61,0xde,0x43,0xa9,0x38,0x53,0x5f,0xd8,0xf2,0x6d,0xf3,0xe4,0xd6,0xd8,0x5e, + 0x81,0x89,0xd0,0x0b,0x86,0x16,0xb1,0x91,0x65,0x76,0xd8,0xc1,0xf7,0x3b,0xca,0x8b, + 0x35,0x07,0x58,0xba,0x77,0xdf,0x11,0x6c,0xbc,0x58,0xee,0x11,0x59,0xf2,0xfe,0xcb, + 0x51,0xdc,0xcd,0x35,0x2e,0x46,0x22,0xa0,0xaa,0x55,0x60,0x7c,0x91,0x15,0xb8,0x00} + }; + #elif defined(SCRYPT_CHACHA) + static const uint8_t post_vectors[][64] = { + {0xd1,0x12,0x6d,0x64,0x10,0x0e,0x98,0x6c,0xbe,0x70,0x21,0xd9,0xc6,0x04,0x62,0xa4, + 0x29,0x13,0x9a,0x3c,0xf8,0xe9,0x1e,0x87,0x9f,0x88,0xf4,0x98,0x01,0x41,0x8e,0xce, + 0x60,0xf7,0xbe,0x17,0x0a,0xec,0xd6,0x30,0x80,0xcf,0x6b,0x1e,0xcf,0x95,0xa0,0x4d, + 0x37,0xed,0x3a,0x09,0xd1,0xeb,0x0c,0x80,0x82,0x22,0x8e,0xd3,0xb1,0x7f,0xd6,0xa8}, + {0x5c,0x5c,0x05,0xe2,0x75,0xa5,0xa4,0xec,0x81,0x97,0x9c,0x5b,0xd7,0x26,0xb3,0x16, + 0xb4,0x02,0x8c,0x56,0xe6,0x32,0x57,0x33,0x47,0x19,0x06,0x6c,0xde,0x68,0x41,0x37, + 0x5b,0x7d,0xa7,0xb3,0x73,0xeb,0x82,0xca,0x0f,0x86,0x2e,0x6b,0x47,0xa2,0x70,0x39, + 0x35,0xfd,0x2d,0x2e,0x7b,0xc3,0x68,0xbb,0x52,0x42,0x19,0x3b,0x78,0x96,0xe7,0xc8} + }; + #elif defined(SCRYPT_SALSA64) + static const uint8_t post_vectors[][64] = { + {0xd2,0xad,0x32,0x05,0xee,0x80,0xe3,0x44,0x70,0xc6,0x34,0xde,0x05,0xb6,0xcf,0x60, + 0x89,0x98,0x70,0xc0,0xb8,0xf5,0x54,0xf1,0xa6,0xb2,0xc8,0x76,0x34,0xec,0xc4,0x59, + 0x8e,0x64,0x42,0xd0,0xa9,0xed,0xe7,0x19,0xb2,0x8a,0x11,0xc6,0xa6,0xbf,0xa7,0xa9, + 0x4e,0x44,0x32,0x7e,0x12,0x91,0x9d,0xfe,0x52,0x48,0xa8,0x27,0xb3,0xfc,0xb1,0x89}, + {0xd6,0x67,0xd2,0x3e,0x30,0x1e,0x9d,0xe2,0x55,0x68,0x17,0x3d,0x2b,0x75,0x5a,0xe5, + 0x04,0xfb,0x3d,0x0e,0x86,0xe0,0xaa,0x1d,0xd4,0x72,0xda,0xb0,0x79,0x41,0xb7,0x99, + 0x68,0xe5,0xd9,0x55,0x79,0x7d,0xc3,0xd1,0xa6,0x56,0xc1,0xbe,0x0b,0x6c,0x62,0x23, + 0x66,0x67,0x91,0x47,0x99,0x13,0x6b,0xe3,0xda,0x59,0x55,0x18,0x67,0x8f,0x2e,0x3b} + }; + #endif +#elif defined(SCRYPT_KECCAK512) + #if defined(SCRYPT_SALSA) + static const uint8_t post_vectors[][64] = { + {0xc2,0x7b,0xbe,0x1d,0xf1,0x99,0xd8,0xe7,0x1b,0xac,0xe0,0x9d,0xeb,0x5a,0xfe,0x21, + 0x71,0xff,0x41,0x51,0x4f,0xbe,0x41,0x01,0x15,0xe2,0xb7,0xb9,0x55,0x15,0x25,0xa1, + 0x40,0x4c,0x66,0x29,0x32,0xb7,0xc9,0x62,0x60,0x88,0xe0,0x99,0x39,0xae,0xce,0x25, + 0x3c,0x11,0x89,0xdd,0xc6,0x14,0xd7,0x3e,0xa3,0x6d,0x07,0x2e,0x56,0xa0,0xff,0x97}, + {0x3c,0x91,0x12,0x4a,0x37,0x7d,0xd6,0x96,0xd2,0x9b,0x5d,0xea,0xb8,0xb9,0x82,0x4e, + 0x4f,0x6b,0x60,0x4c,0x59,0x01,0xe5,0x73,0xfd,0xf6,0xb8,0x9a,0x5a,0xd3,0x7c,0x7a, + 0xd2,0x4f,0x8e,0x74,0xc1,0x90,0x88,0xa0,0x3f,0x55,0x75,0x79,0x10,0xd0,0x09,0x79, + 0x0f,0x6c,0x74,0x0c,0x05,0x08,0x3c,0x8c,0x94,0x7b,0x30,0x56,0xca,0xdf,0xdf,0x34} + }; + #elif defined(SCRYPT_CHACHA) + static const uint8_t post_vectors[][64] = { + {0x77,0xcb,0x70,0xbf,0xae,0xd4,0x4c,0x5b,0xbc,0xd3,0xec,0x8a,0x82,0x43,0x8d,0xb3, + 0x7f,0x1f,0xfb,0x70,0x36,0x32,0x4d,0xa6,0xb7,0x13,0x37,0x77,0x30,0x0c,0x3c,0xfb, + 0x2c,0x20,0x8f,0x2a,0xf4,0x47,0x4d,0x69,0x8e,0xae,0x2d,0xad,0xba,0x35,0xe9,0x2f, + 0xe6,0x99,0x7a,0xf8,0xcf,0x70,0x78,0xbb,0x0c,0x72,0x64,0x95,0x8b,0x36,0x77,0x3d}, + {0xc6,0x43,0x17,0x16,0x87,0x09,0x5f,0x12,0xed,0x21,0xe2,0xb4,0xad,0x55,0xa1,0xa1, + 0x49,0x50,0x90,0x70,0xab,0x81,0x83,0x7a,0xcd,0xdf,0x23,0x52,0x19,0xc0,0xa2,0xd8, + 0x8e,0x98,0xeb,0xf0,0x37,0xab,0xad,0xfd,0x1c,0x04,0x97,0x18,0x42,0x85,0xf7,0x4b, + 0x18,0x2c,0x55,0xd3,0xa9,0xe6,0x89,0xfb,0x58,0x0a,0xb2,0x37,0xb9,0xf8,0xfb,0xc5} + }; + #elif defined(SCRYPT_SALSA64) + static const uint8_t post_vectors[][64] = { + {0xc7,0x34,0x95,0x02,0x5e,0x31,0x0d,0x1f,0x10,0x38,0x9c,0x3f,0x04,0x53,0xed,0x05, + 0x27,0x38,0xc1,0x3f,0x6a,0x0f,0xc5,0xa3,0x9b,0x73,0x8a,0x28,0x7e,0x5d,0x3c,0xdc, + 0x9d,0x5a,0x09,0xbf,0x8c,0x0a,0xad,0xe4,0x73,0x52,0xe3,0x6d,0xaa,0xd1,0x8b,0xbf, + 0xa3,0xb7,0xf0,0x58,0xad,0x22,0x24,0xc9,0xaa,0x96,0xb7,0x5d,0xfc,0x5f,0xb0,0xcf}, + {0x76,0x22,0xfd,0xe8,0xa2,0x79,0x8e,0x9d,0x43,0x8c,0x7a,0xba,0x78,0xb7,0x84,0xf1, + 0xc8,0xee,0x3b,0xae,0x31,0x89,0xbf,0x7e,0xd0,0x4b,0xc1,0x2d,0x58,0x5d,0x84,0x6b, + 0xec,0x86,0x56,0xe0,0x87,0x94,0x7f,0xbc,0xf9,0x48,0x92,0xef,0x54,0x7f,0x23,0x8d, + 0x4f,0x8b,0x0a,0x75,0xa7,0x39,0x0e,0x46,0x6e,0xee,0x58,0xc8,0xfa,0xea,0x90,0x53} + }; + #endif +#elif defined(SCRYPT_KECCAK256) + #if defined(SCRYPT_SALSA) + static const uint8_t post_vectors[][64] = { + {0x2e,0x96,0xd8,0x87,0x45,0xcd,0xd6,0xc8,0xf6,0xd2,0x87,0x33,0x50,0xc7,0x04,0xe5, + 0x3c,0x4b,0x48,0x44,0x57,0xc1,0x74,0x09,0x76,0x02,0xaa,0xd3,0x7b,0xf3,0xbf,0xed, + 0x4b,0x72,0xd7,0x1b,0x49,0x6b,0xe0,0x44,0x83,0xee,0x8f,0xaf,0xa1,0xb5,0x33,0xa9, + 0x9e,0x86,0xab,0xe2,0x9f,0xcf,0x68,0x6e,0x7e,0xbd,0xf5,0x7a,0x83,0x4b,0x1c,0x10}, + {0x42,0x7e,0xf9,0x4b,0x72,0x61,0xda,0x2d,0xb3,0x27,0x0e,0xe1,0xd9,0xde,0x5f,0x3e, + 0x64,0x2f,0xd6,0xda,0x90,0x59,0xce,0xbf,0x02,0x5b,0x32,0xf7,0x6d,0x94,0x51,0x7b, + 0xb6,0xa6,0x0d,0x99,0x3e,0x7f,0x39,0xbe,0x1b,0x1d,0x6c,0x97,0x12,0xd8,0xb7,0xfd, + 0x5b,0xb5,0xf3,0x73,0x5a,0x89,0xb2,0xdd,0xcc,0x3d,0x74,0x2e,0x3d,0x9e,0x3c,0x22} + }; + #elif defined(SCRYPT_CHACHA) + static const uint8_t post_vectors[][64] = { + {0x76,0x1d,0x5b,0x8f,0xa9,0xe1,0xa6,0x01,0xcb,0xc5,0x7a,0x5f,0x02,0x23,0xb6,0x82, + 0x57,0x79,0x60,0x2f,0x05,0x7f,0xb8,0x0a,0xcb,0x5e,0x54,0x11,0x49,0x2e,0xdd,0x85, + 0x83,0x30,0x67,0xb3,0x24,0x5c,0xce,0xfc,0x32,0xcf,0x12,0xc3,0xff,0xe0,0x79,0x36, + 0x74,0x17,0xa6,0x3e,0xcd,0xa0,0x7e,0xcb,0x37,0xeb,0xcb,0xb6,0xe1,0xb9,0xf5,0x15}, + {0xf5,0x66,0xa7,0x4c,0xe4,0xdc,0x18,0x56,0x2f,0x3e,0x86,0x4d,0x92,0xa5,0x5c,0x5a, + 0x8f,0xc3,0x6b,0x32,0xdb,0xe5,0x72,0x50,0x84,0xfc,0x6e,0x5d,0x15,0x77,0x3d,0xca, + 0xc5,0x2b,0x20,0x3c,0x78,0x37,0x80,0x78,0x23,0x56,0x91,0xa0,0xce,0xa4,0x06,0x5a, + 0x7f,0xe3,0xbf,0xab,0x51,0x57,0x32,0x2c,0x0a,0xf0,0xc5,0x6f,0xf4,0xcb,0xff,0x42} + }; + #elif defined(SCRYPT_SALSA64) + static const uint8_t post_vectors[][64] = { + {0xb0,0xb7,0x10,0xb5,0x1f,0x2b,0x7f,0xaf,0x9d,0x95,0x5f,0x4c,0x2d,0x98,0x7c,0xc1, + 0xbc,0x37,0x2f,0x50,0x8d,0xb2,0x9f,0xfd,0x48,0x0d,0xe0,0x44,0x19,0xdf,0x28,0x6c, + 0xab,0xbf,0x1e,0x17,0x26,0xcc,0x57,0x95,0x18,0x17,0x83,0x4c,0x12,0x48,0xd9,0xee, + 0x4b,0x00,0x29,0x06,0x31,0x01,0x6b,0x8c,0x26,0x39,0xbf,0xe4,0xe4,0xd4,0x6a,0x26}, + {0xa0,0x40,0xb2,0xf2,0x11,0xb6,0x5f,0x3d,0x4c,0x1e,0xef,0x59,0xd4,0x98,0xdb,0x14, + 0x01,0xff,0xe3,0x34,0xd7,0x19,0xcd,0xeb,0xde,0x52,0x1c,0xf4,0x86,0x43,0xc9,0xe2, + 0xfb,0xf9,0x4f,0x0a,0xbb,0x1f,0x5c,0x6a,0xdf,0xb9,0x28,0xfa,0xac,0xc4,0x48,0xed, + 0xcc,0xd2,0x2e,0x25,0x5f,0xf3,0x56,0x1d,0x2d,0x23,0x22,0xc1,0xbc,0xff,0x78,0x80} + }; + #endif +#else + static const uint8_t post_vectors[][64] = {{0}}; +#endif + diff --git a/bf/scrypt-jane/example.c b/bf/scrypt-jane/example.c new file mode 100644 index 0000000..6f290a1 --- /dev/null +++ b/bf/scrypt-jane/example.c @@ -0,0 +1,13 @@ +#include +#include "scrypt-jane.h" + + +int main(void) { + unsigned char digest[16]; + int i; + scrypt("pw", 2, "salt", 4, 0, 0, 0, digest, 16); + for (i = 0; i < sizeof(digest); i++) + printf("%02x, ", digest[i]); + printf("\n"); + return 0; +} \ No newline at end of file diff --git a/bf/scrypt-jane/scrypt-jane-speed.c b/bf/scrypt-jane/scrypt-jane-speed.c new file mode 100644 index 0000000..e8d61b3 --- /dev/null +++ b/bf/scrypt-jane/scrypt-jane-speed.c @@ -0,0 +1,121 @@ +#define SCRYPT_TEST_SPEED +#include "scrypt-jane.c" + +/* ticks - not tested on anything other than x86 */ +static uint64_t +get_ticks(void) { +#if defined(CPU_X86) || defined(CPU_X86_64) + #if defined(COMPILER_INTEL) + return _rdtsc(); + #elif defined(COMPILER_MSVC) + return __rdtsc(); + #elif defined(COMPILER_GCC) + uint32_t lo, hi; + __asm__ __volatile__("rdtsc" : "=a" (lo), "=d" (hi)); + return ((uint64_t)lo | ((uint64_t)hi << 32)); + #else + need rdtsc for this compiler + #endif +#elif defined(OS_SOLARIS) + return (uint64_t)gethrtime(); +#elif defined(CPU_SPARC) && !defined(OS_OPENBSD) + uint64_t t; + __asm__ __volatile__("rd %%tick, %0" : "=r" (t)); + return t; +#elif defined(CPU_PPC) + uint32_t lo = 0, hi = 0; + __asm__ __volatile__("mftbu %0; mftb %1" : "=r" (hi), "=r" (lo)); + return ((uint64_t)lo | ((uint64_t)hi << 32)); +#elif defined(CPU_IA64) + uint64_t t; + __asm__ __volatile__("mov %0=ar.itc" : "=r" (t)); + return t; +#elif defined(OS_NIX) + timeval t2; + gettimeofday(&t2, NULL); + t = ((uint64_t)t2.tv_usec << 32) | (uint64_t)t2.tv_sec; + return t; +#else + need ticks for this platform +#endif +} + +#define timeit(x,minvar) { \ + ticks = get_ticks(); \ + x; \ + ticks = get_ticks() - ticks; \ + if (ticks < minvar) \ + minvar = ticks; \ + } + +#define maxticks 0xffffffffffffffffull + +typedef struct scrypt_speed_settings_t { + const char *desc; + uint8_t Nfactor, rfactor, pfactor; +} scrypt_speed_settings; + +/* scrypt_r_32kb is set to a 32kb chunk, so (1 << (scrypt_r_32kb - 5)) = 1kb chunk */ +static const scrypt_speed_settings settings[] = { + {"scrypt high volume ( ~4mb)", 11, scrypt_r_32kb - 5, 0}, + {"scrypt interactive (~16mb)", 13, scrypt_r_32kb - 5, 0}, + {"scrypt non-interactive (~ 1gb)", 19, scrypt_r_32kb - 5, 0}, + {0} +}; + +int main(void) { + const scrypt_speed_settings *s; + uint8_t password[64], salt[24], digest[64]; + uint64_t minticks, ticks; + size_t i, passes; + size_t cpuflags, topbit; + + for (i = 0; i < sizeof(password); i++) + password[i] = (uint8_t)i; + for (i = 0; i < sizeof(salt); i++) + salt[i] = 255 - (uint8_t)i; + + /* warm up a little */ + scrypt(password, sizeof(password), salt, sizeof(salt), 15, 3, 4, digest, sizeof(digest)); + + cpuflags = available_implementations(); + topbit = 0; + for (i = cpuflags; i != 0; i >>= 1) + topbit++; + topbit = ((size_t)1 << topbit); + + while (1) { + #if defined(SCRYPT_CHOOSE_COMPILETIME) + printf("speed test for scrypt[%s,%s]\n", SCRYPT_HASH, SCRYPT_MIX); + #else + printf("speed test for scrypt[%s,%s,%s]\n", SCRYPT_HASH, SCRYPT_MIX, get_top_cpuflag_desc(cpuflags)); + #endif + + cpu_detect_mask = cpuflags; + for (i = 0; settings[i].desc; i++) { + s = &settings[i]; + minticks = maxticks; + for (passes = 0; passes < 16; passes++) + timeit(scrypt(password, sizeof(password), salt, sizeof(salt), s->Nfactor, s->rfactor, s->pfactor, digest, sizeof(digest)), minticks) + + printf("%s, %.0f ticks\n", s->desc, (double)minticks); + } + + #if defined(SCRYPT_CHOOSE_COMPILETIME) + break; + #else + while (topbit && ((cpuflags & topbit) == 0)) + topbit >>= 1; + cpuflags &= ~topbit; + + /* (cpuflags == 0) is the basic/portable version, don't bother timing it */ + if (!cpuflags) + break; + #endif + } + + printf("\n\n"); + + return 0; +} + diff --git a/bf/scrypt-jane/scrypt-jane-test.c b/bf/scrypt-jane/scrypt-jane-test.c new file mode 100644 index 0000000..808f843 --- /dev/null +++ b/bf/scrypt-jane/scrypt-jane-test.c @@ -0,0 +1,12 @@ +#define SCRYPT_TEST +#include "scrypt-jane.c" + +int main(void) { + int res = scrypt_power_on_self_test(); + + printf("%s: test %s\n", SCRYPT_MIX, (res & 1) ? "ok" : "FAILED"); + printf("%s: test %s\n", SCRYPT_HASH, (res & 2) ? "ok" : "FAILED"); + printf("scrypt: test vectors %s\n", (res & 4) ? "ok" : "FAILED"); + + return ((res & 7) == 7) ? 0 : 1; +} diff --git a/bf/scrypt-jane/scrypt-jane.c b/bf/scrypt-jane/scrypt-jane.c new file mode 100644 index 0000000..62eb0c6 --- /dev/null +++ b/bf/scrypt-jane/scrypt-jane.c @@ -0,0 +1,193 @@ +/* + scrypt-jane by Andrew M, https://github.com/floodyberry/scrypt-jane + + Public Domain or MIT License, whichever is easier +*/ + +#include + +#include "scrypt-jane.h" +#include "code/scrypt-jane-portable.h" +#include "code/scrypt-jane-hash.h" +#include "code/scrypt-jane-romix.h" +#include "code/scrypt-jane-test-vectors.h" + + +#define scrypt_maxNfactor 30 /* (1 << (30 + 1)) = ~2 billion */ +#if (SCRYPT_BLOCK_BYTES == 64) +#define scrypt_r_32kb 8 /* (1 << 8) = 256 * 2 blocks in a chunk * 64 bytes = Max of 32kb in a chunk */ +#elif (SCRYPT_BLOCK_BYTES == 128) +#define scrypt_r_32kb 7 /* (1 << 7) = 128 * 2 blocks in a chunk * 128 bytes = Max of 32kb in a chunk */ +#elif (SCRYPT_BLOCK_BYTES == 256) +#define scrypt_r_32kb 6 /* (1 << 6) = 64 * 2 blocks in a chunk * 256 bytes = Max of 32kb in a chunk */ +#elif (SCRYPT_BLOCK_BYTES == 512) +#define scrypt_r_32kb 5 /* (1 << 5) = 32 * 2 blocks in a chunk * 512 bytes = Max of 32kb in a chunk */ +#endif +#define scrypt_maxrfactor scrypt_r_32kb /* 32kb */ +#define scrypt_maxpfactor 25 /* (1 << 25) = ~33 million */ + +#include +//#include + +static void NORETURN +scrypt_fatal_error_default(const char *msg) { + fprintf(stderr, "%s\n", msg); + exit(1); +} + +static scrypt_fatal_errorfn scrypt_fatal_error = scrypt_fatal_error_default; + +void +scrypt_set_fatal_error(scrypt_fatal_errorfn fn) { + scrypt_fatal_error = fn; +} + +static int +scrypt_power_on_self_test(void) { + const scrypt_test_setting *t; + uint8_t test_digest[64]; + uint32_t i; + int res = 7, scrypt_valid; + + if (!scrypt_test_mix()) { +#if !defined(SCRYPT_TEST) + scrypt_fatal_error("scrypt: mix function power-on-self-test failed"); +#endif + res &= ~1; + } + + if (!scrypt_test_hash()) { +#if !defined(SCRYPT_TEST) + scrypt_fatal_error("scrypt: hash function power-on-self-test failed"); +#endif + res &= ~2; + } + + for (i = 0, scrypt_valid = 1; post_settings[i].pw; i++) { + t = post_settings + i; + scrypt((uint8_t *)t->pw, strlen(t->pw), (uint8_t *)t->salt, strlen(t->salt), t->Nfactor, t->rfactor, t->pfactor, test_digest, sizeof(test_digest)); + scrypt_valid &= scrypt_verify(post_vectors[i], test_digest, sizeof(test_digest)); + } + + if (!scrypt_valid) { +#if !defined(SCRYPT_TEST) + scrypt_fatal_error("scrypt: scrypt power-on-self-test failed"); +#endif + res &= ~4; + } + + return res; +} + +typedef struct scrypt_aligned_alloc_t { + uint8_t *mem, *ptr; +} scrypt_aligned_alloc; + +#if defined(SCRYPT_TEST_SPEED) +static uint8_t *mem_base = (uint8_t *)0; +static size_t mem_bump = 0; + +/* allocations are assumed to be multiples of 64 bytes and total allocations not to exceed ~1.01gb */ +static scrypt_aligned_alloc +scrypt_alloc(uint64_t size) { + scrypt_aligned_alloc aa; + if (!mem_base) { + mem_base = (uint8_t *)malloc((1024 * 1024 * 1024) + (1024 * 1024) + (SCRYPT_BLOCK_BYTES - 1)); + if (!mem_base) + scrypt_fatal_error("scrypt: out of memory"); + mem_base = (uint8_t *)(((size_t)mem_base + (SCRYPT_BLOCK_BYTES - 1)) & ~(SCRYPT_BLOCK_BYTES - 1)); + } + aa.mem = mem_base + mem_bump; + aa.ptr = aa.mem; + mem_bump += (size_t)size; + return aa; +} + +static void +scrypt_free(scrypt_aligned_alloc *aa) { + mem_bump = 0; +} +#else +static scrypt_aligned_alloc +scrypt_alloc(uint64_t size) { + static const size_t max_alloc = (size_t)-1; + scrypt_aligned_alloc aa; + size += (SCRYPT_BLOCK_BYTES - 1); + if (size > max_alloc) + scrypt_fatal_error("scrypt: not enough address space on this CPU to allocate required memory"); + aa.mem = (uint8_t *)malloc((size_t)size); + aa.ptr = (uint8_t *)(((size_t)aa.mem + (SCRYPT_BLOCK_BYTES - 1)) & ~(SCRYPT_BLOCK_BYTES - 1)); + //fprintf(stderr, "scrypt_alloc(%zu)\n", size); + if (!aa.mem) + scrypt_fatal_error("scrypt: out of memory"); + return aa; +} + +static void +scrypt_free(scrypt_aligned_alloc *aa) { + free(aa->mem); +} +#endif + + +void +scrypt(const uint8_t *password, size_t password_len, const uint8_t *salt, size_t salt_len, uint8_t Nfactor, uint8_t rfactor, uint8_t pfactor, uint8_t *out, size_t bytes) { + static scrypt_aligned_alloc YX, V; + uint8_t *X, *Y; + uint32_t N, r, p, chunk_bytes, i; + static int last_Nfactor = -1, last_rfactor = -1, last_pfactor = -1; + +#if !defined(SCRYPT_CHOOSE_COMPILETIME) + scrypt_ROMixfn scrypt_ROMix = scrypt_getROMix(); +#endif + +#if !defined(SCRYPT_TEST) + static int power_on_self_test = 0; + if (!power_on_self_test) { + power_on_self_test = 1; + if (!scrypt_power_on_self_test()) + scrypt_fatal_error("scrypt: power on self test failed"); + } +#endif + + if (Nfactor > scrypt_maxNfactor) + scrypt_fatal_error("scrypt: N out of range"); + if (rfactor > scrypt_maxrfactor) + scrypt_fatal_error("scrypt: r out of range"); + if (pfactor > scrypt_maxpfactor) + scrypt_fatal_error("scrypt: p out of range"); + + N = (1 << (Nfactor + 1)); + r = (1 << rfactor); + p = (1 << pfactor); + + //fprintf(stderr, "scrypt(%u,%u,%u) %p %p %p %p\n", N, r, p, &V, V.mem, &YX, YX.mem); + chunk_bytes = SCRYPT_BLOCK_BYTES * r * 2; + if (Nfactor != last_Nfactor || rfactor != last_rfactor || pfactor != last_pfactor) { + //fprintf(stderr, "need to reallocate\n"); + if (last_Nfactor >= 0 || last_rfactor >= 0 || last_pfactor >= 0) { + //fprintf(stderr, "freeing old V and YX\n"); + scrypt_free(&V); + scrypt_free(&YX); + } + V = scrypt_alloc((uint64_t)N * chunk_bytes); + YX = scrypt_alloc((p + 1) * chunk_bytes); + last_Nfactor = Nfactor; + last_rfactor = rfactor; + last_pfactor = pfactor; + } + + /* 1: X = PBKDF2(password, salt) */ + Y = YX.ptr; + X = Y + chunk_bytes; + scrypt_pbkdf2(password, password_len, salt, salt_len, 1, X, chunk_bytes * p); + + /* 2: X = ROMix(X) */ + for (i = 0; i < p; i++) + scrypt_ROMix((scrypt_mix_word_t *)(X + (chunk_bytes * i)), (scrypt_mix_word_t *)Y, (scrypt_mix_word_t *)V.ptr, N, r); + + /* 3: Out = PBKDF2(password, X) */ + scrypt_pbkdf2(password, password_len, X, chunk_bytes * p, 1, out, bytes); + + //scrypt_ensure_zero(YX.ptr, (p + 1) * chunk_bytes); +} diff --git a/bf/scrypt-jane/scrypt-jane.h b/bf/scrypt-jane/scrypt-jane.h new file mode 100644 index 0000000..1c0df62 --- /dev/null +++ b/bf/scrypt-jane/scrypt-jane.h @@ -0,0 +1,27 @@ +#ifndef SCRYPT_JANE_H +#define SCRYPT_JANE_H + +/* + Nfactor: Increases CPU & Memory Hardness + N = (1 << (Nfactor + 1)): How many times to mix a chunk and how many temporary chunks are used + + rfactor: Increases Memory Hardness + r = (1 << rfactor): How large a chunk is + + pfactor: Increases CPU Hardness + p = (1 << pfactor): Number of times to mix the main chunk + + A block is the basic mixing unit (salsa/chacha block = 64 bytes) + A chunk is (2 * r) blocks + + ~Memory used = (N + 2) * ((2 * r) * block size) +*/ + +#include + +typedef void (*scrypt_fatal_errorfn)(const char *msg); +void scrypt_set_fatal_error(scrypt_fatal_errorfn fn); + +void scrypt(const unsigned char *password, size_t password_len, const unsigned char *salt, size_t salt_len, unsigned char Nfactor, unsigned char rfactor, unsigned char pfactor, unsigned char *out, size_t bytes); + +#endif /* SCRYPT_JANE_H */ diff --git a/bf/scrypt-jane/test-speed.sh b/bf/scrypt-jane/test-speed.sh new file mode 100644 index 0000000..f223dae --- /dev/null +++ b/bf/scrypt-jane/test-speed.sh @@ -0,0 +1,38 @@ +#!/bin/sh + +test() { + sleep 0.25 # mingw is stupid and will occasionally not have permission to overwrite scrypt_speed + gcc scrypt-jane-speed.c -O3 -DSCRYPT_$1 -DSCRYPT_$2 $3 -o scrypt_speed 2>/dev/null + local RC=$? + if [ $RC -ne 0 ]; then + echo "$1/$2: failed to compile " + return + fi + ./scrypt_speed +} + +testhash() { + test $1 SALSA $2 + test $1 CHACHA $2 + test $1 SALSA64 $2 +} + +testhashes() { + testhash SHA256 $1 + testhash SHA512 $1 + testhash BLAKE256 $1 + testhash BLAKE512 $1 + testhash SKEIN512 $1 + testhash KECCAK256 $1 + testhash KECCAK512 $1 +} + +if [ -z $1 ]; then + testhashes +elif [ $1 -eq 32 ]; then + testhashes -m32 +elif [ $1 -eq 64 ]; then + testhashes -m64 +fi + +rm -f scrypt_speed \ No newline at end of file diff --git a/bf/scrypt-jane/test.sh b/bf/scrypt-jane/test.sh new file mode 100644 index 0000000..dc3d032 --- /dev/null +++ b/bf/scrypt-jane/test.sh @@ -0,0 +1,44 @@ +#!/bin/sh + +test() { + sleep 0.25 # mingw is stupid and will occasionally not have permission to overwrite scrypt_test + gcc scrypt-jane-test.c -O3 -DSCRYPT_$1 -DSCRYPT_$2 $3 -o scrypt_test 2>/dev/null + local RC=$? + if [ $RC -ne 0 ]; then + echo "$1/$2: failed to compile " + return + fi + ./scrypt_test >/dev/null + local RC=$? + if [ $RC -ne 0 ]; then + echo "$1/$2: validation failed" + return + fi + echo "$1/$2: OK" +} + +testhash() { + test $1 SALSA $2 + test $1 CHACHA $2 + test $1 SALSA64 $2 +} + +testhashes() { + testhash SHA256 $1 + testhash SHA512 $1 + testhash BLAKE256 $1 + testhash BLAKE512 $1 + testhash SKEIN512 $1 + testhash KECCAK256 $1 + testhash KECCAK512 $1 +} + +if [ -z $1 ]; then + testhashes +elif [ $1 -eq 32 ]; then + testhashes -m32 +elif [ $1 -eq 64 ]; then + testhashes -m64 +fi + +rm -f scrypt_test diff --git a/bf/secp256k1/.gitignore b/bf/secp256k1/.gitignore new file mode 100644 index 0000000..e0b7b7a --- /dev/null +++ b/bf/secp256k1/.gitignore @@ -0,0 +1,41 @@ +bench_inv +bench_ecdh +bench_sign +bench_verify +bench_schnorr_verify +bench_recover +bench_internal +tests +gen_context +*.exe +*.so +*.a +!.gitignore + +Makefile +configure +.libs/ +Makefile.in +aclocal.m4 +autom4te.cache/ +config.log +config.status +*.tar.gz +*.la +libtool +.deps/ +.dirstamp +build-aux/ +*.lo +*.o +*~ +src/libsecp256k1-config.h +src/libsecp256k1-config.h.in +src/ecmult_static_context.h +m4/libtool.m4 +m4/ltoptions.m4 +m4/ltsugar.m4 +m4/ltversion.m4 +m4/lt~obsolete.m4 +src/stamp-h1 +libsecp256k1.pc diff --git a/bf/secp256k1/.travis.yml b/bf/secp256k1/.travis.yml new file mode 100644 index 0000000..0e1c8f9 --- /dev/null +++ b/bf/secp256k1/.travis.yml @@ -0,0 +1,62 @@ +language: c +sudo: false +addons: + apt: + packages: libgmp-dev +compiler: + - clang + - gcc +env: + global: + - FIELD=auto BIGNUM=auto SCALAR=auto ENDOMORPHISM=no STATICPRECOMPUTATION=yes ASM=no BUILD=check EXTRAFLAGS= HOST= ECDH=no schnorr=NO + matrix: + - SCALAR=32bit + - SCALAR=32bit FIELD=32bit ECDH=yes + - SCALAR=64bit + - FIELD=64bit + - FIELD=64bit ENDOMORPHISM=yes + - FIELD=64bit ENDOMORPHISM=yes ECDH=yes + - FIELD=64bit ASM=x86_64 + - FIELD=64bit ENDOMORPHISM=yes ASM=x86_64 + - FIELD=32bit SCHNORR=yes + - FIELD=32bit ENDOMORPHISM=yes + - BIGNUM=no + - BIGNUM=no ENDOMORPHISM=yes SCHNORR=yes + - BIGNUM=no STATICPRECOMPUTATION=no + - BUILD=distcheck + - EXTRAFLAGS=CFLAGS=-DDETERMINISTIC +matrix: + fast_finish: true + include: + - compiler: clang + env: HOST=i686-linux-gnu ENDOMORPHISM=yes + addons: + apt: + packages: + - gcc-multilib + - libgmp-dev:i386 + - compiler: clang + env: HOST=i686-linux-gnu + addons: + apt: + packages: + - gcc-multilib + - compiler: gcc + env: HOST=i686-linux-gnu ENDOMORPHISM=yes + addons: + apt: + packages: + - gcc-multilib + - compiler: gcc + env: HOST=i686-linux-gnu + addons: + apt: + packages: + - gcc-multilib + - libgmp-dev:i386 +before_script: ./autogen.sh +script: + - if [ -n "$HOST" ]; then export USE_HOST="--host=$HOST"; fi + - if [ "x$HOST" = "xi686-linux-gnu" ]; then export CC="$CC -m32"; fi + - ./configure --enable-endomorphism=$ENDOMORPHISM --with-field=$FIELD --with-bignum=$BIGNUM --with-scalar=$SCALAR --enable-ecmult-static-precomputation=$STATICPRECOMPUTATION --enable-module-ecdh=$ECDH --enable-module-schnorr=$SCHNORR $EXTRAFLAGS $USE_HOST && make -j2 $BUILD +os: linux diff --git a/bf/secp256k1/COPYING b/bf/secp256k1/COPYING new file mode 100644 index 0000000..4522a59 --- /dev/null +++ b/bf/secp256k1/COPYING @@ -0,0 +1,19 @@ +Copyright (c) 2013 Pieter Wuille + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/bf/secp256k1/Makefile.am b/bf/secp256k1/Makefile.am new file mode 100644 index 0000000..060a51a --- /dev/null +++ b/bf/secp256k1/Makefile.am @@ -0,0 +1,108 @@ +ACLOCAL_AMFLAGS = -I build-aux/m4 + +lib_LTLIBRARIES = libsecp256k1.la +include_HEADERS = include/secp256k1.h +noinst_HEADERS = +noinst_HEADERS += src/scalar.h +noinst_HEADERS += src/scalar_4x64.h +noinst_HEADERS += src/scalar_8x32.h +noinst_HEADERS += src/scalar_impl.h +noinst_HEADERS += src/scalar_4x64_impl.h +noinst_HEADERS += src/scalar_8x32_impl.h +noinst_HEADERS += src/group.h +noinst_HEADERS += src/group_impl.h +noinst_HEADERS += src/num_gmp.h +noinst_HEADERS += src/num_gmp_impl.h +noinst_HEADERS += src/ecdsa.h +noinst_HEADERS += src/ecdsa_impl.h +noinst_HEADERS += src/eckey.h +noinst_HEADERS += src/eckey_impl.h +noinst_HEADERS += src/ecmult.h +noinst_HEADERS += src/ecmult_impl.h +noinst_HEADERS += src/ecmult_const.h +noinst_HEADERS += src/ecmult_const_impl.h +noinst_HEADERS += src/ecmult_gen.h +noinst_HEADERS += src/ecmult_gen_impl.h +noinst_HEADERS += src/num.h +noinst_HEADERS += src/num_impl.h +noinst_HEADERS += src/field_10x26.h +noinst_HEADERS += src/field_10x26_impl.h +noinst_HEADERS += src/field_5x52.h +noinst_HEADERS += src/field_5x52_impl.h +noinst_HEADERS += src/field_5x52_int128_impl.h +noinst_HEADERS += src/field_5x52_asm_impl.h +noinst_HEADERS += src/java/org_bitcoin_NativeSecp256k1.h +noinst_HEADERS += src/util.h +noinst_HEADERS += src/testrand.h +noinst_HEADERS += src/testrand_impl.h +noinst_HEADERS += src/hash.h +noinst_HEADERS += src/hash_impl.h +noinst_HEADERS += src/field.h +noinst_HEADERS += src/field_impl.h +noinst_HEADERS += src/bench.h + +pkgconfigdir = $(libdir)/pkgconfig +pkgconfig_DATA = libsecp256k1.pc + +libsecp256k1_la_SOURCES = src/secp256k1.c +libsecp256k1_la_CPPFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/src $(SECP_INCLUDES) +libsecp256k1_la_LIBADD = $(SECP_LIBS) + + +noinst_PROGRAMS = +if USE_BENCHMARK +noinst_PROGRAMS += bench_verify bench_recover bench_sign bench_internal +bench_verify_SOURCES = src/bench_verify.c +bench_verify_LDADD = libsecp256k1.la $(SECP_LIBS) +bench_verify_LDFLAGS = -static +bench_recover_SOURCES = src/bench_recover.c +bench_recover_LDADD = libsecp256k1.la $(SECP_LIBS) +bench_recover_LDFLAGS = -static +bench_sign_SOURCES = src/bench_sign.c +bench_sign_LDADD = libsecp256k1.la $(SECP_LIBS) +bench_sign_LDFLAGS = -static +bench_internal_SOURCES = src/bench_internal.c +bench_internal_LDADD = $(SECP_LIBS) +bench_internal_LDFLAGS = -static +bench_internal_CPPFLAGS = $(SECP_INCLUDES) +endif + +if USE_TESTS +noinst_PROGRAMS += tests +tests_SOURCES = src/tests.c +tests_CPPFLAGS = -DVERIFY -I$(top_srcdir)/src $(SECP_INCLUDES) $(SECP_TEST_INCLUDES) +tests_LDADD = $(SECP_LIBS) $(SECP_TEST_LIBS) +tests_LDFLAGS = -static +TESTS = tests +endif + +if USE_ECMULT_STATIC_PRECOMPUTATION +CPPFLAGS_FOR_BUILD +=-I$(top_srcdir)/ +CFLAGS_FOR_BUILD += -Wall -Wextra -Wno-unused-function + +gen_context_OBJECTS = gen_context.o +gen_context_BIN = gen_context$(BUILD_EXEEXT) +gen_%.o: src/gen_%.c + $(CC_FOR_BUILD) $(CPPFLAGS_FOR_BUILD) $(CFLAGS_FOR_BUILD) -c $< -o $@ + +$(gen_context_BIN): $(gen_context_OBJECTS) + $(CC_FOR_BUILD) $^ -o $@ + +$(libsecp256k1_la_OBJECTS): src/ecmult_static_context.h +$(tests_OBJECTS): src/ecmult_static_context.h + +src/ecmult_static_context.h: $(gen_context_BIN) + ./$(gen_context_BIN) + +CLEANFILES = $(gen_context_BIN) src/ecmult_static_context.h +endif + +EXTRA_DIST = autogen.sh src/gen_context.c src/basic-config.h + +if ENABLE_MODULE_ECDH +include src/modules/ecdh/Makefile.am.include +endif + +if ENABLE_MODULE_SCHNORR +include src/modules/schnorr/Makefile.am.include +endif diff --git a/bf/secp256k1/README.md b/bf/secp256k1/README.md new file mode 100644 index 0000000..6095db4 --- /dev/null +++ b/bf/secp256k1/README.md @@ -0,0 +1,61 @@ +libsecp256k1 +============ + +[![Build Status](https://travis-ci.org/bitcoin/secp256k1.svg?branch=master)](https://travis-ci.org/bitcoin/secp256k1) + +Optimized C library for EC operations on curve secp256k1. + +This library is a work in progress and is being used to research best practices. Use at your own risk. + +Features: +* secp256k1 ECDSA signing/verification and key generation. +* Adding/multiplying private/public keys. +* Serialization/parsing of private keys, public keys, signatures. +* Constant time, constant memory access signing and pubkey generation. +* Derandomized DSA (via RFC6979 or with a caller provided function.) +* Very efficient implementation. + +Implementation details +---------------------- + +* General + * No runtime heap allocation. + * Extensive testing infrastructure. + * Structured to facilitate review and analysis. + * Intended to be portable to any system with a C89 compiler and uint64_t support. + * Expose only higher level interfaces to minimize the API surface and improve application security. ("Be difficult to use insecurely.") +* Field operations + * Optimized implementation of arithmetic modulo the curve's field size (2^256 - 0x1000003D1). + * Using 5 52-bit limbs (including hand-optimized assembly for x86_64, by Diederik Huys). + * Using 10 26-bit limbs. + * Field inverses and square roots using a sliding window over blocks of 1s (by Peter Dettman). +* Scalar operations + * Optimized implementation without data-dependent branches of arithmetic modulo the curve's order. + * Using 4 64-bit limbs (relying on __int128 support in the compiler). + * Using 8 32-bit limbs. +* Group operations + * Point addition formula specifically simplified for the curve equation (y^2 = x^3 + 7). + * Use addition between points in Jacobian and affine coordinates where possible. + * Use a unified addition/doubling formula where necessary to avoid data-dependent branches. + * Point/x comparison without a field inversion by comparison in the Jacobian coordinate space. +* Point multiplication for verification (a*P + b*G). + * Use wNAF notation for point multiplicands. + * Use a much larger window for multiples of G, using precomputed multiples. + * Use Shamir's trick to do the multiplication with the public key and the generator simultaneously. + * Optionally (off by default) use secp256k1's efficiently-computable endomorphism to split the P multiplicand into 2 half-sized ones. +* Point multiplication for signing + * Use a precomputed table of multiples of powers of 16 multiplied with the generator, so general multiplication becomes a series of additions. + * Access the table with branch-free conditional moves so memory access is uniform. + * No data-dependent branches + * The precomputed tables add and eventually subtract points for which no known scalar (private key) is known, preventing even an attacker with control over the private key used to control the data internally. + +Build steps +----------- + +libsecp256k1 is built using autotools: + + $ ./autogen.sh + $ ./configure + $ make + $ ./tests + $ sudo make install # optional diff --git a/bf/secp256k1/TODO b/bf/secp256k1/TODO new file mode 100644 index 0000000..a300e1c --- /dev/null +++ b/bf/secp256k1/TODO @@ -0,0 +1,3 @@ +* Unit tests for fieldelem/groupelem, including ones intended to + trigger fieldelem's boundary cases. +* Complete constant-time operations for signing/keygen diff --git a/bf/secp256k1/autogen.sh b/bf/secp256k1/autogen.sh new file mode 100755 index 0000000..65286b9 --- /dev/null +++ b/bf/secp256k1/autogen.sh @@ -0,0 +1,3 @@ +#!/bin/sh +set -e +autoreconf -if --warnings=all diff --git a/bf/secp256k1/configure.ac b/bf/secp256k1/configure.ac new file mode 100644 index 0000000..0dc83ee --- /dev/null +++ b/bf/secp256k1/configure.ac @@ -0,0 +1,366 @@ +AC_PREREQ([2.60]) +AC_INIT([libsecp256k1],[0.1]) +AC_CONFIG_AUX_DIR([build-aux]) +AC_CONFIG_MACRO_DIR([build-aux/m4]) +AC_CANONICAL_HOST +AH_TOP([#ifndef LIBSECP256K1_CONFIG_H]) +AH_TOP([#define LIBSECP256K1_CONFIG_H]) +AH_BOTTOM([#endif /*LIBSECP256K1_CONFIG_H*/]) +AM_INIT_AUTOMAKE([foreign subdir-objects]) +LT_INIT + +dnl make the compilation flags quiet unless V=1 is used +m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) + +PKG_PROG_PKG_CONFIG + +AC_PATH_TOOL(AR, ar) +AC_PATH_TOOL(RANLIB, ranlib) +AC_PATH_TOOL(STRIP, strip) +AX_PROG_CC_FOR_BUILD + +if test "x$CFLAGS" = "x"; then + CFLAGS="-O3 -g" +fi + +AM_PROG_CC_C_O + +AC_PROG_CC_C89 +if test x"$ac_cv_prog_cc_c89" = x"no"; then + AC_MSG_ERROR([c89 compiler support required]) +fi + +case $host in + *mingw*) + use_pkgconfig=no + ;; + *) + use_pkgconfig=yes + ;; +esac + +case $host_os in + *darwin*) + if test x$cross_compiling != xyes; then + AC_PATH_PROG([BREW],brew,) + if test x$BREW != x; then + dnl These Homebrew packages may be keg-only, meaning that they won't be found + dnl in expected paths because they may conflict with system files. Ask + dnl Homebrew where each one is located, then adjust paths accordingly. + + openssl_prefix=`$BREW --prefix openssl 2>/dev/null` + gmp_prefix=`$BREW --prefix gmp 2>/dev/null` + if test x$openssl_prefix != x; then + PKG_CONFIG_PATH="$openssl_prefix/lib/pkgconfig:$PKG_CONFIG_PATH" + export PKG_CONFIG_PATH + fi + if test x$gmp_prefix != x; then + GMP_CPPFLAGS="-I$gmp_prefix/include" + GMP_LIBS="-L$gmp_prefix/lib" + fi + else + AC_PATH_PROG([PORT],port,) + dnl if homebrew isn't installed and macports is, add the macports default paths + dnl as a last resort. + if test x$PORT != x; then + CPPFLAGS="$CPPFLAGS -isystem /opt/local/include" + LDFLAGS="$LDFLAGS -L/opt/local/lib" + fi + fi + fi + ;; +esac + +CFLAGS="$CFLAGS -W" + +warn_CFLAGS="-std=c89 -pedantic -Wall -Wextra -Wcast-align -Wnested-externs -Wshadow -Wstrict-prototypes -Wno-unused-function -Wno-long-long -Wno-overlength-strings" +saved_CFLAGS="$CFLAGS" +CFLAGS="$CFLAGS $warn_CFLAGS" +AC_MSG_CHECKING([if ${CC} supports ${warn_CFLAGS}]) +AC_COMPILE_IFELSE([AC_LANG_SOURCE([[char foo;]])], + [ AC_MSG_RESULT([yes]) ], + [ AC_MSG_RESULT([no]) + CFLAGS="$saved_CFLAGS" + ]) + + +AC_ARG_ENABLE(benchmark, + AS_HELP_STRING([--enable-benchmark],[compile benchmark (default is no)]), + [use_benchmark=$enableval], + [use_benchmark=no]) + +AC_ARG_ENABLE(tests, + AS_HELP_STRING([--enable-tests],[compile tests (default is yes)]), + [use_tests=$enableval], + [use_tests=yes]) + +AC_ARG_ENABLE(endomorphism, + AS_HELP_STRING([--enable-endomorphism],[enable endomorphism (default is no)]), + [use_endomorphism=$enableval], + [use_endomorphism=no]) + +AC_ARG_ENABLE(ecmult_static_precomputation, + AS_HELP_STRING([--enable-ecmult-static-precomputation],[enable precomputed ecmult table for signing (default is yes)]), + [use_ecmult_static_precomputation=$enableval], + [use_ecmult_static_precomputation=yes]) + +AC_ARG_ENABLE(module_ecdh, + AS_HELP_STRING([--enable-module-ecdh],[enable ECDH shared secret computation (default is no)]), + [enable_module_ecdh=$enableval], + [enable_module_ecdh=no]) + +AC_ARG_ENABLE(module_schnorr, + AS_HELP_STRING([--enable-module-schnorr],[enable Schnorr signature module (default is no)]), + [enable_module_schnorr=$enableval], + [enable_module_schnorr=no]) + +AC_ARG_WITH([field], [AS_HELP_STRING([--with-field=64bit|32bit|auto], +[Specify Field Implementation. Default is auto])],[req_field=$withval], [req_field=auto]) + +AC_ARG_WITH([bignum], [AS_HELP_STRING([--with-bignum=gmp|no|auto], +[Specify Bignum Implementation. Default is auto])],[req_bignum=$withval], [req_bignum=auto]) + +AC_ARG_WITH([scalar], [AS_HELP_STRING([--with-scalar=64bit|32bit|auto], +[Specify scalar implementation. Default is auto])],[req_scalar=$withval], [req_scalar=auto]) + +AC_ARG_WITH([asm], [AS_HELP_STRING([--with-asm=x86_64|no|auto] +[Specify assembly optimizations to use. Default is auto])],[req_asm=$withval], [req_asm=auto]) + +AC_CHECK_TYPES([__int128]) + +AC_MSG_CHECKING([for __builtin_expect]) +AC_COMPILE_IFELSE([AC_LANG_SOURCE([[void myfunc() {__builtin_expect(0,0);}]])], + [ AC_MSG_RESULT([yes]);AC_DEFINE(HAVE_BUILTIN_EXPECT,1,[Define this symbol if __builtin_expect is available]) ], + [ AC_MSG_RESULT([no]) + ]) + +if test x"$req_asm" = x"auto"; then + SECP_64BIT_ASM_CHECK + if test x"$has_64bit_asm" = x"yes"; then + set_asm=x86_64 + fi + if test x"$set_asm" = x; then + set_asm=no + fi +else + set_asm=$req_asm + case $set_asm in + x86_64) + SECP_64BIT_ASM_CHECK + if test x"$has_64bit_asm" != x"yes"; then + AC_MSG_ERROR([x86_64 assembly optimization requested but not available]) + fi + ;; + no) + ;; + *) + AC_MSG_ERROR([invalid assembly optimization selection]) + ;; + esac +fi + +if test x"$req_field" = x"auto"; then + if test x"set_asm" = x"x86_64"; then + set_field=64bit + fi + if test x"$set_field" = x; then + SECP_INT128_CHECK + if test x"$has_int128" = x"yes"; then + set_field=64bit + fi + fi + if test x"$set_field" = x; then + set_field=32bit + fi +else + set_field=$req_field + case $set_field in + 64bit) + if test x"$set_asm" != x"x86_64"; then + SECP_INT128_CHECK + if test x"$has_int128" != x"yes"; then + AC_MSG_ERROR([64bit field explicitly requested but neither __int128 support or x86_64 assembly available]) + fi + fi + ;; + 32bit) + ;; + *) + AC_MSG_ERROR([invalid field implementation selection]) + ;; + esac +fi + +if test x"$req_scalar" = x"auto"; then + SECP_INT128_CHECK + if test x"$has_int128" = x"yes"; then + set_scalar=64bit + fi + if test x"$set_scalar" = x; then + set_scalar=32bit + fi +else + set_scalar=$req_scalar + case $set_scalar in + 64bit) + SECP_INT128_CHECK + if test x"$has_int128" != x"yes"; then + AC_MSG_ERROR([64bit scalar explicitly requested but __int128 support not available]) + fi + ;; + 32bit) + ;; + *) + AC_MSG_ERROR([invalid scalar implementation selected]) + ;; + esac +fi + +if test x"$req_bignum" = x"auto"; then + SECP_GMP_CHECK + if test x"$has_gmp" = x"yes"; then + set_bignum=gmp + fi + + if test x"$set_bignum" = x; then + set_bignum=no + fi +else + set_bignum=$req_bignum + case $set_bignum in + gmp) + SECP_GMP_CHECK + if test x"$has_gmp" != x"yes"; then + AC_MSG_ERROR([gmp bignum explicitly requested but libgmp not available]) + fi + ;; + no) + ;; + *) + AC_MSG_ERROR([invalid bignum implementation selection]) + ;; + esac +fi + +# select assembly optimization +case $set_asm in +x86_64) + AC_DEFINE(USE_ASM_X86_64, 1, [Define this symbol to enable x86_64 assembly optimizations]) + ;; +no) + ;; +*) + AC_MSG_ERROR([invalid assembly optimizations]) + ;; +esac + +# select field implementation +case $set_field in +64bit) + AC_DEFINE(USE_FIELD_5X52, 1, [Define this symbol to use the FIELD_5X52 implementation]) + ;; +32bit) + AC_DEFINE(USE_FIELD_10X26, 1, [Define this symbol to use the FIELD_10X26 implementation]) + ;; +*) + AC_MSG_ERROR([invalid field implementation]) + ;; +esac + +# select bignum implementation +case $set_bignum in +gmp) + AC_DEFINE(HAVE_LIBGMP, 1, [Define this symbol if libgmp is installed]) + AC_DEFINE(USE_NUM_GMP, 1, [Define this symbol to use the gmp implementation for num]) + AC_DEFINE(USE_FIELD_INV_NUM, 1, [Define this symbol to use the num-based field inverse implementation]) + AC_DEFINE(USE_SCALAR_INV_NUM, 1, [Define this symbol to use the num-based scalar inverse implementation]) + ;; +no) + AC_DEFINE(USE_NUM_NONE, 1, [Define this symbol to use no num implementation]) + AC_DEFINE(USE_FIELD_INV_BUILTIN, 1, [Define this symbol to use the native field inverse implementation]) + AC_DEFINE(USE_SCALAR_INV_BUILTIN, 1, [Define this symbol to use the native scalar inverse implementation]) + ;; +*) + AC_MSG_ERROR([invalid bignum implementation]) + ;; +esac + +#select scalar implementation +case $set_scalar in +64bit) + AC_DEFINE(USE_SCALAR_4X64, 1, [Define this symbol to use the 4x64 scalar implementation]) + ;; +32bit) + AC_DEFINE(USE_SCALAR_8X32, 1, [Define this symbol to use the 8x32 scalar implementation]) + ;; +*) + AC_MSG_ERROR([invalid scalar implementation]) + ;; +esac + +if test x"$use_tests" = x"yes"; then + SECP_OPENSSL_CHECK + if test x"$has_openssl_ec" = x"yes"; then + AC_DEFINE(ENABLE_OPENSSL_TESTS, 1, [Define this symbol if OpenSSL EC functions are available]) + SECP_TEST_INCLUDES="$SSL_CFLAGS $CRYPTO_CFLAGS" + SECP_TEST_LIBS="$CRYPTO_LIBS" + + case $host in + *mingw*) + SECP_TEST_LIBS="$SECP_TEST_LIBS -lgdi32" + ;; + esac + + fi +fi + +if test x"$set_bignum" = x"gmp"; then + SECP_LIBS="$SECP_LIBS $GMP_LIBS" + SECP_INCLUDES="$SECP_INCLUDES $GMP_CPPFLAGS" +fi + +if test x"$use_endomorphism" = x"yes"; then + AC_DEFINE(USE_ENDOMORPHISM, 1, [Define this symbol to use endomorphism optimization]) +fi + +if test x"$use_ecmult_static_precomputation" = x"yes"; then + AC_DEFINE(USE_ECMULT_STATIC_PRECOMPUTATION, 1, [Define this symbol to use a statically generated ecmult table]) +fi + +if test x"$enable_module_ecdh" = x"yes"; then + AC_DEFINE(ENABLE_MODULE_ECDH, 1, [Define this symbol to enable the ECDH module]) +fi + +if test x"$enable_module_schnorr" = x"yes"; then + AC_DEFINE(ENABLE_MODULE_SCHNORR, 1, [Define this symbol to enable the Schnorr signature module]) +fi + +AC_C_BIGENDIAN() + +AC_MSG_NOTICE([Using assembly optimizations: $set_asm]) +AC_MSG_NOTICE([Using field implementation: $set_field]) +AC_MSG_NOTICE([Using bignum implementation: $set_bignum]) +AC_MSG_NOTICE([Using scalar implementation: $set_scalar]) +AC_MSG_NOTICE([Using endomorphism optimizations: $use_endomorphism]) +AC_MSG_NOTICE([Building ECDH module: $enable_module_ecdh]) + +AC_MSG_NOTICE([Building Schnorr signatures module: $enable_module_schnorr]) + +AC_CONFIG_HEADERS([src/libsecp256k1-config.h]) +AC_CONFIG_FILES([Makefile libsecp256k1.pc]) +AC_SUBST(SECP_INCLUDES) +AC_SUBST(SECP_LIBS) +AC_SUBST(SECP_TEST_LIBS) +AC_SUBST(SECP_TEST_INCLUDES) +AM_CONDITIONAL([USE_TESTS], [test x"$use_tests" != x"no"]) +AM_CONDITIONAL([USE_BENCHMARK], [test x"$use_benchmark" = x"yes"]) +AM_CONDITIONAL([USE_ECMULT_STATIC_PRECOMPUTATION], [test x"$use_ecmult_static_precomputation" = x"yes"]) +AM_CONDITIONAL([ENABLE_MODULE_ECDH], [test x"$enable_module_ecdh" = x"yes"]) +AM_CONDITIONAL([ENABLE_MODULE_SCHNORR], [test x"$enable_module_schnorr" = x"yes"]) + +dnl make sure nothing new is exported so that we don't break the cache +PKGCONFIG_PATH_TEMP="$PKG_CONFIG_PATH" +unset PKG_CONFIG_PATH +PKG_CONFIG_PATH="$PKGCONFIG_PATH_TEMP" + +AC_OUTPUT diff --git a/bf/secp256k1/include/secp256k1.h b/bf/secp256k1/include/secp256k1.h new file mode 100644 index 0000000..1b8361e --- /dev/null +++ b/bf/secp256k1/include/secp256k1.h @@ -0,0 +1,453 @@ +#ifndef _SECP256K1_ +# define _SECP256K1_ + +# ifdef __cplusplus +extern "C" { +# endif + +# if !defined(SECP256K1_GNUC_PREREQ) +# if defined(__GNUC__)&&defined(__GNUC_MINOR__) +# define SECP256K1_GNUC_PREREQ(_maj,_min) \ + ((__GNUC__<<16)+__GNUC_MINOR__>=((_maj)<<16)+(_min)) +# else +# define SECP256K1_GNUC_PREREQ(_maj,_min) 0 +# endif +# endif + +# if (!defined(__STDC_VERSION__) || (__STDC_VERSION__ < 199901L) ) +# if SECP256K1_GNUC_PREREQ(2,7) +# define SECP256K1_INLINE __inline__ +# elif (defined(_MSC_VER)) +# define SECP256K1_INLINE __inline +# else +# define SECP256K1_INLINE +# endif +# else +# define SECP256K1_INLINE inline +# endif + +/**Warning attributes + * NONNULL is not used if SECP256K1_BUILD is set to avoid the compiler optimizing out + * some paranoid null checks. */ +# if defined(__GNUC__) && SECP256K1_GNUC_PREREQ(3, 4) +# define SECP256K1_WARN_UNUSED_RESULT __attribute__ ((__warn_unused_result__)) +# else +# define SECP256K1_WARN_UNUSED_RESULT +# endif +# if !defined(SECP256K1_BUILD) && defined(__GNUC__) && SECP256K1_GNUC_PREREQ(3, 4) +# define SECP256K1_ARG_NONNULL(_x) __attribute__ ((__nonnull__(_x))) +# else +# define SECP256K1_ARG_NONNULL(_x) +# endif + +/** Opaque data structure that holds context information (precomputed tables etc.). + * Only functions that take a pointer to a non-const context require exclusive + * access to it. Multiple functions that take a pointer to a const context may + * run simultaneously. + */ +typedef struct secp256k1_context_struct secp256k1_context_t; + +/** Flags to pass to secp256k1_context_create. */ +# define SECP256K1_CONTEXT_VERIFY (1 << 0) +# define SECP256K1_CONTEXT_SIGN (1 << 1) + +/** Create a secp256k1 context object. + * Returns: a newly created context object. + * In: flags: which parts of the context to initialize. + */ +secp256k1_context_t* secp256k1_context_create( + int flags +) SECP256K1_WARN_UNUSED_RESULT; + +/** Copies a secp256k1 context object. + * Returns: a newly created context object. + * In: ctx: an existing context to copy + */ +secp256k1_context_t* secp256k1_context_clone( + const secp256k1_context_t* ctx +) SECP256K1_WARN_UNUSED_RESULT; + +/** Destroy a secp256k1 context object. + * The context pointer may not be used afterwards. + */ +void secp256k1_context_destroy( + secp256k1_context_t* ctx +) SECP256K1_ARG_NONNULL(1); + +/** Set a callback function to be called when an illegal argument is passed to + * an API call. The philosophy is that these shouldn't be dealt with through a + * specific return value, as calling code should not have branches to deal with + * the case that this code itself is broken. + * On the other hand, during debug stage, one would want to be informed about + * such mistakes, and the default (crashing) may be inadvisable. + * When this callback is triggered, the API function called is guaranteed not + * to cause a crash, though its return value and output arguments are + * undefined. + */ +void secp256k1_context_set_illegal_callback( + secp256k1_context_t* ctx, + void (*fun)(const char* message, void* data), + void* data +) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2); + +/** Set a callback function to be called when an internal consistency check + * fails. The default is crashing. + * This can only trigger in case of a hardware failure, miscompilation, + * memory corruption, serious bug in the library, or other error would can + * otherwise result in undefined behaviour. It will not trigger due to mere + * incorrect usage of the API (see secp256k1_context_set_illegal_callback + * for that). After this callback returns, anything may happen, including + * crashing. + */ +void secp256k1_context_set_error_callback( + secp256k1_context_t* ctx, + void (*fun)(const char* message, void* data), + void* data +) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2); + +/** Data type to hold a parsed and valid public key. + This data type should be considered opaque to the user, and only created + through API functions. It is not guaranteed to be compatible between + different implementations. If you need to convert to a format suitable + for storage or transmission, use secp256k1_ec_pubkey_serialize and + secp256k1_ec_pubkey_parse. + */ +typedef struct { + unsigned char data[64]; +} secp256k1_pubkey_t; + +/** Parse a variable-length public key into the pubkey object. + * Returns: 1 if the public key was fully valid. + * 0 if the public key could not be parsed or is invalid. + * In: ctx: a secp256k1 context object. + * input: pointer to a serialized public key + * inputlen: length of the array pointed to by input + * Out: pubkey: pointer to a pubkey object. If 1 is returned, it is set to a + * parsed version of input. If not, its value is undefined. + * This function supports parsing compressed (33 bytes, header byte 0x02 or + * 0x03), uncompressed (65 bytes, header byte 0x04), or hybrid (65 bytes, header + * byte 0x06 or 0x07) format public keys. + */ +SECP256K1_WARN_UNUSED_RESULT int secp256k1_ec_pubkey_parse( + const secp256k1_context_t* ctx, + secp256k1_pubkey_t* pubkey, + const unsigned char *input, + int inputlen +) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3); + +/** Serialize a pubkey object into a serialized byte sequence. + * Returns: 1 always. + * In: ctx: a secp256k1 context object. + * pubkey: a pointer to a secp256k1_pubkey_t containing an initialized + * public key. + * compressed: whether to serialize in compressed format. + * Out: output: a pointer to a 65-byte (if compressed==0) or 33-byte (if + * compressed==1) byte array to place the serialized key in. + * outputlen: a pointer to an integer which will contain the serialized + * size. + */ +int secp256k1_ec_pubkey_serialize( + const secp256k1_context_t* ctx, + unsigned char *output, + int *outputlen, + const secp256k1_pubkey_t* pubkey, + int compressed +) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4); + +/** Data type to hold a parsed ECDSA signature, optionally supporting pubkey + * recovery. + This data type should be considered opaque to the user, and only created + through API functions. It is not guaranteed to be compatible between + different implementations. If you need to convert to a format suitable + for storage or transmission, use secp256k1_ecdsa_signature_serialize_* and + secp256k1_ecdsa_signature_parse_* functions. */ +typedef struct { + unsigned char data[65]; +} secp256k1_ecdsa_signature_t; + +/** Parse a DER ECDSA signature. + * Returns: 1 when the signature could be parsed, 0 otherwise. + * In: ctx: a secp256k1 context object + * input: a pointer to the signature to be parsed + * inputlen: the length of the array pointed to be input + * Out: sig: a pointer to a signature object + * + * Note that this function also supports some violations of DER. + * + * The resulting signature object will not support pubkey recovery. + */ +int secp256k1_ecdsa_signature_parse_der( + const secp256k1_context_t* ctx, + secp256k1_ecdsa_signature_t* sig, + const unsigned char *input, + int inputlen +) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3); + +/** Parse a compact ECDSA signature (64 bytes + recovery id). + * Returns: 1 when the signature could be parsed, 0 otherwise + * In: ctx: a secp256k1 context object + * input64: a pointer to a 64-byte compact signature + * recid: the recovery id (0, 1, 2 or 3, or -1 for unknown) + * Out: sig: a pointer to a signature object + * + * If recid is not -1, the resulting signature object will support pubkey + * recovery. + */ +int secp256k1_ecdsa_signature_parse_compact( + const secp256k1_context_t* ctx, + secp256k1_ecdsa_signature_t* sig, + const unsigned char *input64, + int recid +) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3); + +/** Serialize an ECDSA signature in DER format. + * Returns: 1 if enough space was available to serialize, 0 otherwise + * In: ctx: a secp256k1 context object + * sig: a pointer to an initialized signature object + * Out: output: a pointer to an array to store the DER serialization + * In/Out: outputlen: a pointer to a length integer. Initially, this integer + * should be set to the length of output. After the call + * it will be set to the length of the serialization (even + * if 0 was returned). + */ +int secp256k1_ecdsa_signature_serialize_der( + const secp256k1_context_t* ctx, + unsigned char *output, + int *outputlen, + const secp256k1_ecdsa_signature_t* sig +) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4); + +/** Serialize an ECDSA signature in compact format (64 bytes + recovery id). + * Returns: 1 + * In: ctx: a secp256k1 context object + * sig: a pointer to an initialized signature object (cannot be NULL) + * Out: output64: a pointer to a 64-byte array of the compact signature (cannot be NULL) + * recid: a pointer to an integer to hold the recovery id (can be NULL). + * + * If recid is not NULL, the signature must support pubkey recovery. + */ +int secp256k1_ecdsa_signature_serialize_compact( + const secp256k1_context_t* ctx, + unsigned char *output64, + int *recid, + const secp256k1_ecdsa_signature_t* sig +) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(4); + +/** Verify an ECDSA signature. + * Returns: 1: correct signature + * 0: incorrect or unparseable signature + * In: ctx: a secp256k1 context object, initialized for verification. + * msg32: the 32-byte message hash being verified (cannot be NULL) + * sig: the signature being verified (cannot be NULL) + * pubkey: pointer to an initialized public key to verify with (cannot be NULL) + */ +SECP256K1_WARN_UNUSED_RESULT int secp256k1_ecdsa_verify( + const secp256k1_context_t* ctx, + const unsigned char *msg32, + const secp256k1_ecdsa_signature_t *sig, + const secp256k1_pubkey_t *pubkey +) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4); + +/** A pointer to a function to deterministically generate a nonce. + * Returns: 1 if a nonce was successfully generated. 0 will cause signing to fail. + * In: msg32: the 32-byte message hash being verified (will not be NULL) + * key32: pointer to a 32-byte secret key (will not be NULL) + * algo16: pointer to a 16-byte array describing the signature + * algorithm (will be NULL for ECDSA for compatibility). + * attempt: how many iterations we have tried to find a nonce. + * This will almost always be 0, but different attempt values + * are required to result in a different nonce. + * data: Arbitrary data pointer that is passed through. + * Out: nonce32: pointer to a 32-byte array to be filled by the function. + * Except for test cases, this function should compute some cryptographic hash of + * the message, the key and the attempt. + */ +typedef int (*secp256k1_nonce_function_t)( + unsigned char *nonce32, + const unsigned char *msg32, + const unsigned char *key32, + const unsigned char *algo16, + unsigned int attempt, + const void *data +); + +/** An implementation of RFC6979 (using HMAC-SHA256) as nonce generation function. + * If a data pointer is passed, it is assumed to be a pointer to 32 bytes of + * extra entropy. + */ +extern const secp256k1_nonce_function_t secp256k1_nonce_function_rfc6979; + +/** A default safe nonce generation function (currently equal to secp256k1_nonce_function_rfc6979). */ +extern const secp256k1_nonce_function_t secp256k1_nonce_function_default; + +/** Create an ECDSA signature. + * Returns: 1: signature created + * 0: the nonce generation function failed, or the private key was invalid. + * In: ctx: pointer to a context object, initialized for signing (cannot be NULL) + * msg32: the 32-byte message hash being signed (cannot be NULL) + * seckey: pointer to a 32-byte secret key (cannot be NULL) + * noncefp:pointer to a nonce generation function. If NULL, secp256k1_nonce_function_default is used + * ndata: pointer to arbitrary data used by the nonce generation function (can be NULL) + * Out: sig: pointer to an array where the signature will be placed (cannot be NULL) + * + * The resulting signature will support pubkey recovery. + * + * The sig always has an s value in the lower half of the range (From 0x1 + * to 0x7FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF5D576E7357A4501DDFE92F46681B20A0, + * inclusive), unlike many other implementations. + * With ECDSA a third-party can can forge a second distinct signature + * of the same message given a single initial signature without knowing + * the key by setting s to its additive inverse mod-order, 'flipping' the + * sign of the random point R which is not included in the signature. + * Since the forgery is of the same message this isn't universally + * problematic, but in systems where message malleability or uniqueness + * of signatures is important this can cause issues. This forgery can be + * blocked by all verifiers forcing signers to use a canonical form. The + * lower-S form reduces the size of signatures slightly on average when + * variable length encodings (such as DER) are used and is cheap to + * verify, making it a good choice. Security of always using lower-S is + * assured because anyone can trivially modify a signature after the + * fact to enforce this property. Adjusting it inside the signing + * function avoids the need to re-serialize or have curve specific + * constants outside of the library. By always using a canonical form + * even in applications where it isn't needed it becomes possible to + * impose a requirement later if a need is discovered. + * No other forms of ECDSA malleability are known and none seem likely, + * but there is no formal proof that ECDSA, even with this additional + * restriction, is free of other malleability. Commonly used serialization + * schemes will also accept various non-unique encodings, so care should + * be taken when this property is required for an application. + */ +int secp256k1_ecdsa_sign( + const secp256k1_context_t* ctx, + const unsigned char *msg32, + secp256k1_ecdsa_signature_t *sig, + const unsigned char *seckey, + secp256k1_nonce_function_t noncefp, + const void *ndata +) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4); + +/** Recover an ECDSA public key from a signature. + * Returns: 1: public key successfully recovered (which guarantees a correct signature). + * 0: otherwise. + * In: ctx: pointer to a context object, initialized for verification (cannot be NULL) + * msg32: the 32-byte message hash assumed to be signed (cannot be NULL) + * sig64: pointer to initialized signature that supports pubkey recovery (cannot be NULL) + * Out: pubkey: pointer to the recoved public key (cannot be NULL) + */ +SECP256K1_WARN_UNUSED_RESULT int secp256k1_ecdsa_recover( + const secp256k1_context_t* ctx, + const unsigned char *msg32, + const secp256k1_ecdsa_signature_t *sig, + secp256k1_pubkey_t *pubkey +) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4); + +/** Verify an ECDSA secret key. + * Returns: 1: secret key is valid + * 0: secret key is invalid + * In: ctx: pointer to a context object (cannot be NULL) + * seckey: pointer to a 32-byte secret key (cannot be NULL) + */ +SECP256K1_WARN_UNUSED_RESULT int secp256k1_ec_seckey_verify( + const secp256k1_context_t* ctx, + const unsigned char *seckey +) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2); + +/** Compute the public key for a secret key. + * In: ctx: pointer to a context object, initialized for signing (cannot be NULL) + * seckey: pointer to a 32-byte private key (cannot be NULL) + * Out: pubkey: pointer to the created public key (cannot be NULL) + * Returns: 1: secret was valid, public key stores + * 0: secret was invalid, try again + */ +SECP256K1_WARN_UNUSED_RESULT int secp256k1_ec_pubkey_create( + const secp256k1_context_t* ctx, + secp256k1_pubkey_t *pubkey, + const unsigned char *seckey +) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3); + +/** Export a private key in DER format. + * In: ctx: pointer to a context object, initialized for signing (cannot be NULL) + */ +SECP256K1_WARN_UNUSED_RESULT int secp256k1_ec_privkey_export( + const secp256k1_context_t* ctx, + const unsigned char *seckey, + unsigned char *privkey, + int *privkeylen, + int compressed +) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4); + +/** Import a private key in DER format. */ +SECP256K1_WARN_UNUSED_RESULT int secp256k1_ec_privkey_import( + const secp256k1_context_t* ctx, + unsigned char *seckey, + const unsigned char *privkey, + int privkeylen +) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3); + +/** Tweak a private key by adding tweak to it. */ +SECP256K1_WARN_UNUSED_RESULT int secp256k1_ec_privkey_tweak_add( + const secp256k1_context_t* ctx, + unsigned char *seckey, + const unsigned char *tweak +) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3); + +/** Tweak a public key by adding tweak times the generator to it. + * In: ctx: pointer to a context object, initialized for verification (cannot be NULL) + */ +SECP256K1_WARN_UNUSED_RESULT int secp256k1_ec_pubkey_tweak_add( + const secp256k1_context_t* ctx, + secp256k1_pubkey_t *pubkey, + const unsigned char *tweak +) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3); + +/** Tweak a private key by multiplying it with tweak. */ +SECP256K1_WARN_UNUSED_RESULT int secp256k1_ec_privkey_tweak_mul( + const secp256k1_context_t* ctx, + unsigned char *seckey, + const unsigned char *tweak +) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3); + +/** Tweak a public key by multiplying it with tweak. + * In: ctx: pointer to a context object, initialized for verification (cannot be NULL) + */ +SECP256K1_WARN_UNUSED_RESULT int secp256k1_ec_pubkey_tweak_mul( + const secp256k1_context_t* ctx, + secp256k1_pubkey_t *pubkey, + const unsigned char *tweak +) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3); + +/** Updates the context randomization. + * Returns: 1: randomization successfully updated + * 0: error + * In: ctx: pointer to a context object (cannot be NULL) + * seed32: pointer to a 32-byte random seed (NULL resets to initial state) + */ +SECP256K1_WARN_UNUSED_RESULT int secp256k1_context_randomize( + secp256k1_context_t* ctx, + const unsigned char *seed32 +) SECP256K1_ARG_NONNULL(1); + +/** Add a number of public keys together. + * Returns: 1: the sum of the public keys is valid. + * 0: the sum of the public keys is not valid. + * In: ctx: pointer to a context object + * out: pointer to pubkey for placing the resulting public key + * (cannot be NULL) + * n: the number of public keys to add together (must be at least 1) + * ins: pointer to array of pointers to public keys (cannot be NULL) + * Use secp256k1_ec_pubkey_compress and secp256k1_ec_pubkey_decompress if the + * uncompressed format is needed. + */ +SECP256K1_WARN_UNUSED_RESULT int secp256k1_ec_pubkey_combine( + const secp256k1_context_t* ctx, + secp256k1_pubkey_t *out, + int n, + const secp256k1_pubkey_t * const * ins +) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(4); + +# ifdef __cplusplus +} +# endif + +#endif diff --git a/bf/secp256k1/include/secp256k1_ecdh.h b/bf/secp256k1/include/secp256k1_ecdh.h new file mode 100644 index 0000000..671c393 --- /dev/null +++ b/bf/secp256k1/include/secp256k1_ecdh.h @@ -0,0 +1,30 @@ +#ifndef _SECP256K1_ECDH_ +# define _SECP256K1_ECDH_ + +# include "secp256k1.h" + +# ifdef __cplusplus +extern "C" { +# endif + +/** Compute an EC Diffie-Hellman secret in constant time + * Returns: 1: exponentiation was successful + * 0: scalar was invalid (zero or overflow) + * In: ctx: pointer to a context object (cannot be NULL) + * point: pointer to a public point + * scalar: a 32-byte scalar with which to multiply the point + * Out: result: a 32-byte array which will be populated by an ECDH + * secret computed from the point and scalar + */ +SECP256K1_WARN_UNUSED_RESULT int secp256k1_ecdh( + const secp256k1_context_t* ctx, + unsigned char *result, + const secp256k1_pubkey_t *point, + const unsigned char *scalar +) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4); + +# ifdef __cplusplus +} +# endif + +#endif diff --git a/bf/secp256k1/include/secp256k1_schnorr.h b/bf/secp256k1/include/secp256k1_schnorr.h new file mode 100644 index 0000000..ff7b977 --- /dev/null +++ b/bf/secp256k1/include/secp256k1_schnorr.h @@ -0,0 +1,173 @@ +#ifndef _SECP256K1_SCHNORR_ +# define _SECP256K1_SCHNORR_ + +# include "secp256k1.h" + +# ifdef __cplusplus +extern "C" { +# endif + +/** Create a signature using a custom EC-Schnorr-SHA256 construction. It + * produces non-malleable 64-byte signatures which support public key recovery + * batch validation, and multiparty signing. + * Returns: 1: signature created + * 0: the nonce generation function failed, or the private key was + * invalid. + * In: ctx: pointer to a context object, initialized for signing + * (cannot be NULL) + * msg32: the 32-byte message hash being signed (cannot be NULL) + * seckey: pointer to a 32-byte secret key (cannot be NULL) + * noncefp:pointer to a nonce generation function. If NULL, + * secp256k1_nonce_function_default is used + * ndata: pointer to arbitrary data used by the nonce generation + * function (can be NULL) + * Out: sig64: pointer to a 64-byte array where the signature will be + * placed (cannot be NULL) + */ +int secp256k1_schnorr_sign( + const secp256k1_context_t* ctx, + const unsigned char *msg32, + unsigned char *sig64, + const unsigned char *seckey, + secp256k1_nonce_function_t noncefp, + const void *ndata +) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4); + +/** Verify a signature created by secp256k1_schnorr_sign. + * Returns: 1: correct signature + * 0: incorrect signature + * In: ctx: a secp256k1 context object, initialized for verification. + * msg32: the 32-byte message hash being verified (cannot be NULL) + * sig64: the 64-byte signature being verified (cannot be NULL) + * pubkey: the public key to verify with (cannot be NULL) + */ +SECP256K1_WARN_UNUSED_RESULT int secp256k1_schnorr_verify( + const secp256k1_context_t* ctx, + const unsigned char *msg32, + const unsigned char *sig64, + const secp256k1_pubkey_t *pubkey +) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4); + +/** Recover an EC public key from a Schnorr signature created using + * secp256k1_schnorr_sign. + * Returns: 1: public key successfully recovered (which guarantees a correct + * signature). + * 0: otherwise. + * In: ctx: pointer to a context object, initialized for + * verification (cannot be NULL) + * msg32: the 32-byte message hash assumed to be signed (cannot + * be NULL) + * sig64: signature as 64 byte array (cannot be NULL) + * Out: pubkey: pointer to a pubkey to set to the recovered public key + * (cannot be NULL). + */ +int secp256k1_schnorr_recover( + const secp256k1_context_t* ctx, + const unsigned char *msg32, + const unsigned char *sig64, + secp256k1_pubkey_t *pubkey +) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4); + +/** Generate a nonce pair deterministically for use with + * secp256k1_schnorr_partial_sign. + * Returns: 1: valid nonce pair was generated. + * 0: otherwise (nonce generation function failed) + * In: ctx: pointer to a context object, initialized for signing + * (cannot be NULL) + * msg32: the 32-byte message hash assumed to be signed (cannot + * be NULL) + * sec32: the 32-byte private key (cannot be NULL) + * noncefp: pointer to a nonce generation function. If NULL, + * secp256k1_nonce_function_default is used + * noncedata: pointer to arbitrary data used by the nonce generation + * function (can be NULL) + * Out: pubnonce: public side of the nonce (cannot be NULL) + * privnonce32: private side of the nonce (32 byte) (cannot be NULL) + * + * Do not use the output as a private/public key pair for signing/validation. + */ +int secp256k1_schnorr_generate_nonce_pair( + const secp256k1_context_t* ctx, + const unsigned char *msg32, + const unsigned char *sec32, + secp256k1_nonce_function_t noncefp, + const void* noncedata, + secp256k1_pubkey_t *pubnonce, + unsigned char *privnonce32 +) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(6) SECP256K1_ARG_NONNULL(7); + +/** Produce a partial Schnorr signature, which can be combined using + * secp256k1_schnorr_partial_combine, to end up with a full signature that is + * verifiable using secp256k1_schnorr_verify. + * Returns: 1: signature created succesfully. + * 0: no valid signature exists with this combination of keys, nonces + * and message (chance around 1 in 2^128) + * -1: invalid private key, nonce, or public nonces. + * In: ctx: pointer to context object, initialized for signing (cannot + * be NULL) + * msg32: pointer to 32-byte message to sign + * sec32: pointer to 32-byte private key + * secnonce32: pointer to 32-byte array containing our nonce + * pubnonce_others: pointer to pubkey containing the sum of the other's + * nonces (see secp256k1_ec_pubkey_combine) + * Out: sig64: pointer to 64-byte array to put partial signature in + * + * The intended procedure for creating a multiparty signature is: + * - Each signer S[i] with private key x[i] and public key Q[i] runs + * secp256k1_schnorr_generate_nonce_pair to produce a pair (k[i],R[i]) of + * private/public nonces. + * - All signers communicate their public nonces to each other (revealing your + * private nonce can lead to discovery of your private key, so it should be + * considered secret). + * - All signers combine all the public nonces they received (excluding their + * own) using secp256k1_ec_pubkey_combine to obtain an + * Rall[i] = sum(R[0..i-1,i+1..n]). + * - All signers produce a partial signature using + * secp256k1_schnorr_partial_sign, passing in their own private key x[i], + * their own private nonce k[i], and the sum of the others' public nonces + * Rall[i]. + * - All signers communicate their partial signatures to each other. + * - Someone combines all partial signatures using + * secp256k1_schnorr_partial_combine, to obtain a full signature. + * - The resulting signature is validatable using secp256k1_schnorr_verify, with + * public key equal to the result of secp256k1_ec_pubkey_combine of the + * signers' public keys (sum(Q[0..n])). + * + * Note that secp256k1_schnorr_partial_combine and secp256k1_ec_pubkey_combine + * function take their arguments in any order, and it is possible to + * pre-combine several inputs already with one call, and add more inputs later + * by calling the function again (they are commutative and associative). + */ +SECP256K1_WARN_UNUSED_RESULT int secp256k1_schnorr_partial_sign( + const secp256k1_context_t* ctx, + const unsigned char *msg32, + unsigned char *sig64, + const unsigned char *sec32, + const unsigned char *secnonce32, + const secp256k1_pubkey_t *pubnonce_others +) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4) SECP256K1_ARG_NONNULL(5) SECP256K1_ARG_NONNULL(6); + +/** Combine multiple Schnorr partial signatures. + * Returns: 1: the passed signatures were succesfully combined. + * 0: the resulting signature is not valid (chance of 1 in 2^256) + * -1: some inputs were invalid, or the signatures were not created + * using the same set of nonces + * In: ctx: pointer to a context object + * sig64: pointer to a 64-byte array to place the combined signature + * (cannot be NULL) + * n: the number of signatures to combine (at least 1) + * Out: sig64sin: pointer to an array of n pointers to 64-byte input + * signatures + */ +SECP256K1_WARN_UNUSED_RESULT int secp256k1_schnorr_partial_combine( + const secp256k1_context_t* ctx, + unsigned char *sig64, + int n, + const unsigned char * const * sig64sin +) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(4); + +# ifdef __cplusplus +} +# endif + +#endif diff --git a/bf/secp256k1/libsecp256k1.pc.in b/bf/secp256k1/libsecp256k1.pc.in new file mode 100644 index 0000000..1c72dd0 --- /dev/null +++ b/bf/secp256k1/libsecp256k1.pc.in @@ -0,0 +1,13 @@ +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: libsecp256k1 +Description: Optimized C library for EC operations on curve secp256k1 +URL: https://github.com/bitcoin/secp256k1 +Version: @PACKAGE_VERSION@ +Cflags: -I${includedir} +Libs.private: @SECP_LIBS@ +Libs: -L${libdir} -lsecp256k1 + diff --git a/bf/secp256k1/obj/.gitignore b/bf/secp256k1/obj/.gitignore new file mode 100644 index 0000000..e69de29 diff --git a/bf/secp256k1/src/basic-config.h b/bf/secp256k1/src/basic-config.h new file mode 100644 index 0000000..c4c16eb --- /dev/null +++ b/bf/secp256k1/src/basic-config.h @@ -0,0 +1,32 @@ +/********************************************************************** + * Copyright (c) 2013, 2014 Pieter Wuille * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#ifndef _SECP256K1_BASIC_CONFIG_ +#define _SECP256K1_BASIC_CONFIG_ + +#ifdef USE_BASIC_CONFIG + +#undef USE_ASM_X86_64 +#undef USE_ENDOMORPHISM +#undef USE_FIELD_10X26 +#undef USE_FIELD_5X52 +#undef USE_FIELD_INV_BUILTIN +#undef USE_FIELD_INV_NUM +#undef USE_NUM_GMP +#undef USE_NUM_NONE +#undef USE_SCALAR_4X64 +#undef USE_SCALAR_8X32 +#undef USE_SCALAR_INV_BUILTIN +#undef USE_SCALAR_INV_NUM + +#define USE_NUM_NONE 1 +#define USE_FIELD_INV_BUILTIN 1 +#define USE_SCALAR_INV_BUILTIN 1 +#define USE_FIELD_10X26 1 +#define USE_SCALAR_8X32 1 + +#endif // USE_BASIC_CONFIG +#endif // _SECP256K1_BASIC_CONFIG_ diff --git a/bf/secp256k1/src/bench.h b/bf/secp256k1/src/bench.h new file mode 100644 index 0000000..db5f68c --- /dev/null +++ b/bf/secp256k1/src/bench.h @@ -0,0 +1,56 @@ +/********************************************************************** + * Copyright (c) 2014 Pieter Wuille * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#ifndef _SECP256K1_BENCH_H_ +#define _SECP256K1_BENCH_H_ + +#include +#include +#include "sys/time.h" + +static double gettimedouble(void) { + struct timeval tv; + gettimeofday(&tv, NULL); + return tv.tv_usec * 0.000001 + tv.tv_sec; +} + +void print_number(double x) { + double y = x; + int c = 0; + if (y < 0.0) y = -y; + while (y < 100.0) { + y *= 10.0; + c++; + } + printf("%.*f", c, x); +} + +void run_benchmark(char *name, void (*benchmark)(void*), void (*setup)(void*), void (*teardown)(void*), void* data, int count, int iter) { + int i; + double min = HUGE_VAL; + double sum = 0.0; + double max = 0.0; + for (i = 0; i < count; i++) { + double begin, total; + if (setup) setup(data); + begin = gettimedouble(); + benchmark(data); + total = gettimedouble() - begin; + if (teardown) teardown(data); + if (total < min) min = total; + if (total > max) max = total; + sum += total; + } + printf("%s: min ", name); + print_number(min * 1000000.0 / iter); + printf("us / avg "); + print_number((sum / count) * 1000000.0 / iter); + printf("us / max "); + print_number(max * 1000000.0 / iter); + printf("us\n"); +} + +#endif diff --git a/bf/secp256k1/src/bench_ecdh.c b/bf/secp256k1/src/bench_ecdh.c new file mode 100644 index 0000000..e28035a --- /dev/null +++ b/bf/secp256k1/src/bench_ecdh.c @@ -0,0 +1,51 @@ +/********************************************************************** + * Copyright (c) 2015 Pieter Wuille, Andrew Poelstra * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#include + +#include "include/secp256k1.h" +#include "include/secp256k1_ecdh.h" +#include "util.h" +#include "bench.h" + +typedef struct { + secp256k1_context_t *ctx; + secp256k1_pubkey_t point; + unsigned char scalar[32]; +} bench_ecdh_t; + +static void bench_ecdh_setup(void* arg) { + int i; + bench_ecdh_t *data = (bench_ecdh_t*)arg; + const unsigned char point[] = { + 0x03, + 0x54, 0x94, 0xc1, 0x5d, 0x32, 0x09, 0x97, 0x06, + 0xc2, 0x39, 0x5f, 0x94, 0x34, 0x87, 0x45, 0xfd, + 0x75, 0x7c, 0xe3, 0x0e, 0x4e, 0x8c, 0x90, 0xfb, + 0xa2, 0xba, 0xd1, 0x84, 0xf8, 0x83, 0xc6, 0x9f + }; + + data->ctx = secp256k1_context_create(0); + for (i = 0; i < 32; i++) data->scalar[i] = i + 1; + CHECK(secp256k1_ec_pubkey_parse(data->ctx, &data->point, point, sizeof(point)) == 1); +} + +static void bench_ecdh(void* arg) { + int i; + unsigned char res[32]; + bench_ecdh_t *data = (bench_ecdh_t*)arg; + + for (i = 0; i < 20000; i++) { + CHECK(secp256k1_ecdh(data->ctx, res, &data->point, data->scalar) == 1); + } +} + +int main(void) { + bench_ecdh_t data; + + run_benchmark("ecdh", bench_ecdh, bench_ecdh_setup, NULL, &data, 10, 20000); + return 0; +} diff --git a/bf/secp256k1/src/bench_internal.c b/bf/secp256k1/src/bench_internal.c new file mode 100644 index 0000000..1a06e94 --- /dev/null +++ b/bf/secp256k1/src/bench_internal.c @@ -0,0 +1,331 @@ +/********************************************************************** + * Copyright (c) 2014-2015 Pieter Wuille * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ +#include + +#include "include/secp256k1.h" + +#include "util.h" +#include "hash_impl.h" +#include "num_impl.h" +#include "field_impl.h" +#include "group_impl.h" +#include "scalar_impl.h" +#include "ecmult_const_impl.h" +#include "ecmult_impl.h" +#include "bench.h" + +typedef struct { + secp256k1_scalar_t scalar_x, scalar_y; + secp256k1_fe_t fe_x, fe_y; + secp256k1_ge_t ge_x, ge_y; + secp256k1_gej_t gej_x, gej_y; + unsigned char data[64]; + int wnaf[256]; +} bench_inv_t; + +void bench_setup(void* arg) { + bench_inv_t *data = (bench_inv_t*)arg; + + static const unsigned char init_x[32] = { + 0x02, 0x03, 0x05, 0x07, 0x0b, 0x0d, 0x11, 0x13, + 0x17, 0x1d, 0x1f, 0x25, 0x29, 0x2b, 0x2f, 0x35, + 0x3b, 0x3d, 0x43, 0x47, 0x49, 0x4f, 0x53, 0x59, + 0x61, 0x65, 0x67, 0x6b, 0x6d, 0x71, 0x7f, 0x83 + }; + + static const unsigned char init_y[32] = { + 0x82, 0x83, 0x85, 0x87, 0x8b, 0x8d, 0x81, 0x83, + 0x97, 0xad, 0xaf, 0xb5, 0xb9, 0xbb, 0xbf, 0xc5, + 0xdb, 0xdd, 0xe3, 0xe7, 0xe9, 0xef, 0xf3, 0xf9, + 0x11, 0x15, 0x17, 0x1b, 0x1d, 0xb1, 0xbf, 0xd3 + }; + + secp256k1_scalar_set_b32(&data->scalar_x, init_x, NULL); + secp256k1_scalar_set_b32(&data->scalar_y, init_y, NULL); + secp256k1_fe_set_b32(&data->fe_x, init_x); + secp256k1_fe_set_b32(&data->fe_y, init_y); + CHECK(secp256k1_ge_set_xo_var(&data->ge_x, &data->fe_x, 0)); + CHECK(secp256k1_ge_set_xo_var(&data->ge_y, &data->fe_y, 1)); + secp256k1_gej_set_ge(&data->gej_x, &data->ge_x); + secp256k1_gej_set_ge(&data->gej_y, &data->ge_y); + memcpy(data->data, init_x, 32); + memcpy(data->data + 32, init_y, 32); +} + +void bench_scalar_add(void* arg) { + int i; + bench_inv_t *data = (bench_inv_t*)arg; + + for (i = 0; i < 2000000; i++) { + secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y); + } +} + +void bench_scalar_negate(void* arg) { + int i; + bench_inv_t *data = (bench_inv_t*)arg; + + for (i = 0; i < 2000000; i++) { + secp256k1_scalar_negate(&data->scalar_x, &data->scalar_x); + } +} + +void bench_scalar_sqr(void* arg) { + int i; + bench_inv_t *data = (bench_inv_t*)arg; + + for (i = 0; i < 200000; i++) { + secp256k1_scalar_sqr(&data->scalar_x, &data->scalar_x); + } +} + +void bench_scalar_mul(void* arg) { + int i; + bench_inv_t *data = (bench_inv_t*)arg; + + for (i = 0; i < 200000; i++) { + secp256k1_scalar_mul(&data->scalar_x, &data->scalar_x, &data->scalar_y); + } +} + +#ifdef USE_ENDOMORPHISM +void bench_scalar_split(void* arg) { + int i; + bench_inv_t *data = (bench_inv_t*)arg; + + for (i = 0; i < 20000; i++) { + secp256k1_scalar_t l, r; + secp256k1_scalar_split_lambda(&l, &r, &data->scalar_x); + secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y); + } +} +#endif + +void bench_scalar_inverse(void* arg) { + int i; + bench_inv_t *data = (bench_inv_t*)arg; + + for (i = 0; i < 2000; i++) { + secp256k1_scalar_inverse(&data->scalar_x, &data->scalar_x); + secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y); + } +} + +void bench_scalar_inverse_var(void* arg) { + int i; + bench_inv_t *data = (bench_inv_t*)arg; + + for (i = 0; i < 2000; i++) { + secp256k1_scalar_inverse_var(&data->scalar_x, &data->scalar_x); + secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y); + } +} + +void bench_field_normalize(void* arg) { + int i; + bench_inv_t *data = (bench_inv_t*)arg; + + for (i = 0; i < 2000000; i++) { + secp256k1_fe_normalize(&data->fe_x); + } +} + +void bench_field_normalize_weak(void* arg) { + int i; + bench_inv_t *data = (bench_inv_t*)arg; + + for (i = 0; i < 2000000; i++) { + secp256k1_fe_normalize_weak(&data->fe_x); + } +} + +void bench_field_mul(void* arg) { + int i; + bench_inv_t *data = (bench_inv_t*)arg; + + for (i = 0; i < 200000; i++) { + secp256k1_fe_mul(&data->fe_x, &data->fe_x, &data->fe_y); + } +} + +void bench_field_sqr(void* arg) { + int i; + bench_inv_t *data = (bench_inv_t*)arg; + + for (i = 0; i < 200000; i++) { + secp256k1_fe_sqr(&data->fe_x, &data->fe_x); + } +} + +void bench_field_inverse(void* arg) { + int i; + bench_inv_t *data = (bench_inv_t*)arg; + + for (i = 0; i < 20000; i++) { + secp256k1_fe_inv(&data->fe_x, &data->fe_x); + secp256k1_fe_add(&data->fe_x, &data->fe_y); + } +} + +void bench_field_inverse_var(void* arg) { + int i; + bench_inv_t *data = (bench_inv_t*)arg; + + for (i = 0; i < 20000; i++) { + secp256k1_fe_inv_var(&data->fe_x, &data->fe_x); + secp256k1_fe_add(&data->fe_x, &data->fe_y); + } +} + +void bench_field_sqrt_var(void* arg) { + int i; + bench_inv_t *data = (bench_inv_t*)arg; + + for (i = 0; i < 20000; i++) { + secp256k1_fe_sqrt_var(&data->fe_x, &data->fe_x); + secp256k1_fe_add(&data->fe_x, &data->fe_y); + } +} + +void bench_group_double_var(void* arg) { + int i; + bench_inv_t *data = (bench_inv_t*)arg; + + for (i = 0; i < 200000; i++) { + secp256k1_gej_double_var(&data->gej_x, &data->gej_x, NULL); + } +} + +void bench_group_add_var(void* arg) { + int i; + bench_inv_t *data = (bench_inv_t*)arg; + + for (i = 0; i < 200000; i++) { + secp256k1_gej_add_var(&data->gej_x, &data->gej_x, &data->gej_y, NULL); + } +} + +void bench_group_add_affine(void* arg) { + int i; + bench_inv_t *data = (bench_inv_t*)arg; + + for (i = 0; i < 200000; i++) { + secp256k1_gej_add_ge(&data->gej_x, &data->gej_x, &data->ge_y); + } +} + +void bench_group_add_affine_var(void* arg) { + int i; + bench_inv_t *data = (bench_inv_t*)arg; + + for (i = 0; i < 200000; i++) { + secp256k1_gej_add_ge_var(&data->gej_x, &data->gej_x, &data->ge_y, NULL); + } +} + +void bench_ecmult_wnaf(void* arg) { + int i; + bench_inv_t *data = (bench_inv_t*)arg; + + for (i = 0; i < 20000; i++) { + secp256k1_ecmult_wnaf(data->wnaf, 256, &data->scalar_x, WINDOW_A); + secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y); + } +} + +void bench_wnaf_const(void* arg) { + int i; + bench_inv_t *data = (bench_inv_t*)arg; + + for (i = 0; i < 20000; i++) { + secp256k1_wnaf_const(data->wnaf, data->scalar_x, WINDOW_A); + secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y); + } +} + + +void bench_sha256(void* arg) { + int i; + bench_inv_t *data = (bench_inv_t*)arg; + secp256k1_sha256_t sha; + + for (i = 0; i < 20000; i++) { + secp256k1_sha256_initialize(&sha); + secp256k1_sha256_write(&sha, data->data, 32); + secp256k1_sha256_finalize(&sha, data->data); + } +} + +void bench_hmac_sha256(void* arg) { + int i; + bench_inv_t *data = (bench_inv_t*)arg; + secp256k1_hmac_sha256_t hmac; + + for (i = 0; i < 20000; i++) { + secp256k1_hmac_sha256_initialize(&hmac, data->data, 32); + secp256k1_hmac_sha256_write(&hmac, data->data, 32); + secp256k1_hmac_sha256_finalize(&hmac, data->data); + } +} + +void bench_rfc6979_hmac_sha256(void* arg) { + int i; + bench_inv_t *data = (bench_inv_t*)arg; + secp256k1_rfc6979_hmac_sha256_t rng; + + for (i = 0; i < 20000; i++) { + secp256k1_rfc6979_hmac_sha256_initialize(&rng, data->data, 64); + secp256k1_rfc6979_hmac_sha256_generate(&rng, data->data, 32); + } +} + + +int have_flag(int argc, char** argv, char *flag) { + char** argm = argv + argc; + argv++; + if (argv == argm) { + return 1; + } + while (argv != NULL && argv != argm) { + if (strcmp(*argv, flag) == 0) return 1; + argv++; + } + return 0; +} + +int main(int argc, char **argv) { + bench_inv_t data; + if (have_flag(argc, argv, "scalar") || have_flag(argc, argv, "add")) run_benchmark("scalar_add", bench_scalar_add, bench_setup, NULL, &data, 10, 2000000); + if (have_flag(argc, argv, "scalar") || have_flag(argc, argv, "negate")) run_benchmark("scalar_negate", bench_scalar_negate, bench_setup, NULL, &data, 10, 2000000); + if (have_flag(argc, argv, "scalar") || have_flag(argc, argv, "sqr")) run_benchmark("scalar_sqr", bench_scalar_sqr, bench_setup, NULL, &data, 10, 200000); + if (have_flag(argc, argv, "scalar") || have_flag(argc, argv, "mul")) run_benchmark("scalar_mul", bench_scalar_mul, bench_setup, NULL, &data, 10, 200000); +#ifdef USE_ENDOMORPHISM + if (have_flag(argc, argv, "scalar") || have_flag(argc, argv, "split")) run_benchmark("scalar_split", bench_scalar_split, bench_setup, NULL, &data, 10, 20000); +#endif + if (have_flag(argc, argv, "scalar") || have_flag(argc, argv, "inverse")) run_benchmark("scalar_inverse", bench_scalar_inverse, bench_setup, NULL, &data, 10, 2000); + if (have_flag(argc, argv, "scalar") || have_flag(argc, argv, "inverse")) run_benchmark("scalar_inverse_var", bench_scalar_inverse_var, bench_setup, NULL, &data, 10, 2000); + + if (have_flag(argc, argv, "field") || have_flag(argc, argv, "normalize")) run_benchmark("field_normalize", bench_field_normalize, bench_setup, NULL, &data, 10, 2000000); + if (have_flag(argc, argv, "field") || have_flag(argc, argv, "normalize")) run_benchmark("field_normalize_weak", bench_field_normalize_weak, bench_setup, NULL, &data, 10, 2000000); + if (have_flag(argc, argv, "field") || have_flag(argc, argv, "sqr")) run_benchmark("field_sqr", bench_field_sqr, bench_setup, NULL, &data, 10, 200000); + if (have_flag(argc, argv, "field") || have_flag(argc, argv, "mul")) run_benchmark("field_mul", bench_field_mul, bench_setup, NULL, &data, 10, 200000); + if (have_flag(argc, argv, "field") || have_flag(argc, argv, "inverse")) run_benchmark("field_inverse", bench_field_inverse, bench_setup, NULL, &data, 10, 20000); + if (have_flag(argc, argv, "field") || have_flag(argc, argv, "inverse")) run_benchmark("field_inverse_var", bench_field_inverse_var, bench_setup, NULL, &data, 10, 20000); + if (have_flag(argc, argv, "field") || have_flag(argc, argv, "sqrt")) run_benchmark("field_sqrt_var", bench_field_sqrt_var, bench_setup, NULL, &data, 10, 20000); + + if (have_flag(argc, argv, "group") || have_flag(argc, argv, "double")) run_benchmark("group_double_var", bench_group_double_var, bench_setup, NULL, &data, 10, 200000); + if (have_flag(argc, argv, "group") || have_flag(argc, argv, "add")) run_benchmark("group_add_var", bench_group_add_var, bench_setup, NULL, &data, 10, 200000); + if (have_flag(argc, argv, "group") || have_flag(argc, argv, "add")) run_benchmark("group_add_affine", bench_group_add_affine, bench_setup, NULL, &data, 10, 200000); + if (have_flag(argc, argv, "group") || have_flag(argc, argv, "add")) run_benchmark("group_add_affine_var", bench_group_add_affine_var, bench_setup, NULL, &data, 10, 200000); + + if (have_flag(argc, argv, "ecmult") || have_flag(argc, argv, "wnaf")) run_benchmark("wnaf_const", bench_wnaf_const, bench_setup, NULL, &data, 10, 20000); + if (have_flag(argc, argv, "ecmult") || have_flag(argc, argv, "wnaf")) run_benchmark("ecmult_wnaf", bench_ecmult_wnaf, bench_setup, NULL, &data, 10, 20000); + + if (have_flag(argc, argv, "hash") || have_flag(argc, argv, "sha256")) run_benchmark("hash_sha256", bench_sha256, bench_setup, NULL, &data, 10, 20000); + if (have_flag(argc, argv, "hash") || have_flag(argc, argv, "hmac")) run_benchmark("hash_hmac_sha256", bench_hmac_sha256, bench_setup, NULL, &data, 10, 20000); + if (have_flag(argc, argv, "hash") || have_flag(argc, argv, "rng6979")) run_benchmark("hash_rfc6979_hmac_sha256", bench_rfc6979_hmac_sha256, bench_setup, NULL, &data, 10, 20000); + return 0; +} diff --git a/bf/secp256k1/src/bench_recover.c b/bf/secp256k1/src/bench_recover.c new file mode 100644 index 0000000..1b198ec --- /dev/null +++ b/bf/secp256k1/src/bench_recover.c @@ -0,0 +1,55 @@ +/********************************************************************** + * Copyright (c) 2014 Pieter Wuille * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#include "include/secp256k1.h" +#include "util.h" +#include "bench.h" + +typedef struct { + secp256k1_context_t *ctx; + unsigned char msg[32]; + unsigned char sig[64]; +} bench_recover_t; + +void bench_recover(void* arg) { + int i; + bench_recover_t *data = (bench_recover_t*)arg; + secp256k1_pubkey_t pubkey; + unsigned char pubkeyc[33]; + + for (i = 0; i < 20000; i++) { + int j; + int pubkeylen = 33; + secp256k1_ecdsa_signature_t sig; + CHECK(secp256k1_ecdsa_signature_parse_compact(data->ctx, &sig, data->sig, i % 2)); + CHECK(secp256k1_ecdsa_recover(data->ctx, data->msg, &sig, &pubkey)); + CHECK(secp256k1_ec_pubkey_serialize(data->ctx, pubkeyc, &pubkeylen, &pubkey, 1)); + for (j = 0; j < 32; j++) { + data->sig[j + 32] = data->msg[j]; /* Move former message to S. */ + data->msg[j] = data->sig[j]; /* Move former R to message. */ + data->sig[j] = pubkeyc[j + 1]; /* Move recovered pubkey X coordinate to R (which must be a valid X coordinate). */ + } + } +} + +void bench_recover_setup(void* arg) { + int i; + bench_recover_t *data = (bench_recover_t*)arg; + + for (i = 0; i < 32; i++) data->msg[i] = 1 + i; + for (i = 0; i < 64; i++) data->sig[i] = 65 + i; +} + +int main(void) { + bench_recover_t data; + + data.ctx = secp256k1_context_create(SECP256K1_CONTEXT_VERIFY); + + run_benchmark("ecdsa_recover", bench_recover, bench_recover_setup, NULL, &data, 10, 20000); + + secp256k1_context_destroy(data.ctx); + return 0; +} diff --git a/bf/secp256k1/src/bench_schnorr_verify.c b/bf/secp256k1/src/bench_schnorr_verify.c new file mode 100644 index 0000000..274b6e0 --- /dev/null +++ b/bf/secp256k1/src/bench_schnorr_verify.c @@ -0,0 +1,69 @@ +/********************************************************************** + * Copyright (c) 2014 Pieter Wuille * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#include +#include + +#include "include/secp256k1.h" +#include "include/secp256k1_schnorr.h" +#include "util.h" +#include "bench.h" + +typedef struct { + unsigned char key[32]; + unsigned char sig[64]; + unsigned char pubkey[33]; + int pubkeylen; +} benchmark_schnorr_sig_t; + +typedef struct { + secp256k1_context_t *ctx; + unsigned char msg[32]; + benchmark_schnorr_sig_t sigs[64]; + int numsigs; +} benchmark_schnorr_verify_t; + +static void benchmark_schnorr_init(void* arg) { + int i, k; + benchmark_schnorr_verify_t* data = (benchmark_schnorr_verify_t*)arg; + + for (i = 0; i < 32; i++) data->msg[i] = 1 + i; + for (k = 0; k < data->numsigs; k++) { + secp256k1_pubkey_t pubkey; + for (i = 0; i < 32; i++) data->sigs[k].key[i] = 33 + i + k; + secp256k1_schnorr_sign(data->ctx, data->msg, data->sigs[k].sig, data->sigs[k].key, NULL, NULL); + data->sigs[k].pubkeylen = 33; + CHECK(secp256k1_ec_pubkey_create(data->ctx, &pubkey, data->sigs[k].key)); + CHECK(secp256k1_ec_pubkey_serialize(data->ctx, data->sigs[k].pubkey, &data->sigs[k].pubkeylen, &pubkey, 1)); + } +} + +static void benchmark_schnorr_verify(void* arg) { + int i; + benchmark_schnorr_verify_t* data = (benchmark_schnorr_verify_t*)arg; + + for (i = 0; i < 20000 / data->numsigs; i++) { + secp256k1_pubkey_t pubkey; + data->sigs[0].sig[(i >> 8) % 64] ^= (i & 0xFF); + CHECK(secp256k1_ec_pubkey_parse(data->ctx, &pubkey, data->sigs[0].pubkey, data->sigs[0].pubkeylen)); + CHECK(secp256k1_schnorr_verify(data->ctx, data->msg, data->sigs[0].sig, &pubkey) == ((i & 0xFF) == 0)); + data->sigs[0].sig[(i >> 8) % 64] ^= (i & 0xFF); + } +} + + + +int main(void) { + benchmark_schnorr_verify_t data; + + data.ctx = secp256k1_context_create(SECP256K1_CONTEXT_SIGN | SECP256K1_CONTEXT_VERIFY); + + data.numsigs = 1; + run_benchmark("schnorr_verify", benchmark_schnorr_verify, benchmark_schnorr_init, NULL, &data, 10, 20000); + + secp256k1_context_destroy(data.ctx); + return 0; +} diff --git a/bf/secp256k1/src/bench_sign.c b/bf/secp256k1/src/bench_sign.c new file mode 100644 index 0000000..e625da0 --- /dev/null +++ b/bf/secp256k1/src/bench_sign.c @@ -0,0 +1,52 @@ +/********************************************************************** + * Copyright (c) 2014 Pieter Wuille * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#include "include/secp256k1.h" +#include "util.h" +#include "bench.h" + +typedef struct { + secp256k1_context_t* ctx; + unsigned char msg[32]; + unsigned char key[32]; +} bench_sign_t; + +static void bench_sign_setup(void* arg) { + int i; + bench_sign_t *data = (bench_sign_t*)arg; + + for (i = 0; i < 32; i++) data->msg[i] = i + 1; + for (i = 0; i < 32; i++) data->key[i] = i + 65; +} + +static void bench_sign(void* arg) { + int i; + bench_sign_t *data = (bench_sign_t*)arg; + + unsigned char sig[64]; + for (i = 0; i < 20000; i++) { + int j; + int recid = 0; + secp256k1_ecdsa_signature_t signature; + CHECK(secp256k1_ecdsa_sign(data->ctx, data->msg, &signature, data->key, NULL, NULL)); + CHECK(secp256k1_ecdsa_signature_serialize_compact(data->ctx, sig, &recid, &signature)); + for (j = 0; j < 32; j++) { + data->msg[j] = sig[j]; /* Move former R to message. */ + data->key[j] = sig[j + 32]; /* Move former S to key. */ + } + } +} + +int main(void) { + bench_sign_t data; + + data.ctx = secp256k1_context_create(SECP256K1_CONTEXT_SIGN); + + run_benchmark("ecdsa_sign", bench_sign, bench_sign_setup, NULL, &data, 10, 20000); + + secp256k1_context_destroy(data.ctx); + return 0; +} diff --git a/bf/secp256k1/src/bench_verify.c b/bf/secp256k1/src/bench_verify.c new file mode 100644 index 0000000..b8c5bdf --- /dev/null +++ b/bf/secp256k1/src/bench_verify.c @@ -0,0 +1,63 @@ +/********************************************************************** + * Copyright (c) 2014 Pieter Wuille * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#include +#include + +#include "include/secp256k1.h" +#include "util.h" +#include "bench.h" + +typedef struct { + secp256k1_context_t *ctx; + unsigned char msg[32]; + unsigned char key[32]; + unsigned char sig[72]; + int siglen; + unsigned char pubkey[33]; + int pubkeylen; +} benchmark_verify_t; + +static void benchmark_verify(void* arg) { + int i; + benchmark_verify_t* data = (benchmark_verify_t*)arg; + + for (i = 0; i < 20000; i++) { + secp256k1_pubkey_t pubkey; + secp256k1_ecdsa_signature_t sig; + data->sig[data->siglen - 1] ^= (i & 0xFF); + data->sig[data->siglen - 2] ^= ((i >> 8) & 0xFF); + data->sig[data->siglen - 3] ^= ((i >> 16) & 0xFF); + CHECK(secp256k1_ec_pubkey_parse(data->ctx, &pubkey, data->pubkey, data->pubkeylen) == 1); + CHECK(secp256k1_ecdsa_signature_parse_der(data->ctx, &sig, data->sig, data->siglen) == 1); + CHECK(secp256k1_ecdsa_verify(data->ctx, data->msg, &sig, &pubkey) == (i == 0)); + data->sig[data->siglen - 1] ^= (i & 0xFF); + data->sig[data->siglen - 2] ^= ((i >> 8) & 0xFF); + data->sig[data->siglen - 3] ^= ((i >> 16) & 0xFF); + } +} + +int main(void) { + int i; + secp256k1_pubkey_t pubkey; + secp256k1_ecdsa_signature_t sig; + benchmark_verify_t data; + + data.ctx = secp256k1_context_create(SECP256K1_CONTEXT_SIGN | SECP256K1_CONTEXT_VERIFY); + + for (i = 0; i < 32; i++) data.msg[i] = 1 + i; + for (i = 0; i < 32; i++) data.key[i] = 33 + i; + data.siglen = 72; + CHECK(secp256k1_ecdsa_sign(data.ctx, data.msg, &sig, data.key, NULL, NULL)); + CHECK(secp256k1_ecdsa_signature_serialize_der(data.ctx, data.sig, &data.siglen, &sig)); + CHECK(secp256k1_ec_pubkey_create(data.ctx, &pubkey, data.key)); + CHECK(secp256k1_ec_pubkey_serialize(data.ctx, data.pubkey, &data.pubkeylen, &pubkey, 1) == 1); + + run_benchmark("ecdsa_verify", benchmark_verify, NULL, NULL, &data, 10, 20000); + + secp256k1_context_destroy(data.ctx); + return 0; +} diff --git a/bf/secp256k1/src/ecdsa.h b/bf/secp256k1/src/ecdsa.h new file mode 100644 index 0000000..c361728 --- /dev/null +++ b/bf/secp256k1/src/ecdsa.h @@ -0,0 +1,20 @@ +/********************************************************************** + * Copyright (c) 2013, 2014 Pieter Wuille * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#ifndef _SECP256K1_ECDSA_ +#define _SECP256K1_ECDSA_ + +#include "scalar.h" +#include "group.h" +#include "ecmult.h" + +static int secp256k1_ecdsa_sig_parse(secp256k1_scalar_t *r, secp256k1_scalar_t *s, const unsigned char *sig, int size); +static int secp256k1_ecdsa_sig_serialize(unsigned char *sig, int *size, const secp256k1_scalar_t *r, const secp256k1_scalar_t *s); +static int secp256k1_ecdsa_sig_verify(const secp256k1_ecmult_context_t *ctx, const secp256k1_scalar_t* r, const secp256k1_scalar_t* s, const secp256k1_ge_t *pubkey, const secp256k1_scalar_t *message); +static int secp256k1_ecdsa_sig_sign(const secp256k1_ecmult_gen_context_t *ctx, secp256k1_scalar_t* r, secp256k1_scalar_t* s, const secp256k1_scalar_t *seckey, const secp256k1_scalar_t *message, const secp256k1_scalar_t *nonce, int *recid); +static int secp256k1_ecdsa_sig_recover(const secp256k1_ecmult_context_t *ctx, const secp256k1_scalar_t* r, const secp256k1_scalar_t* s, secp256k1_ge_t *pubkey, const secp256k1_scalar_t *message, int recid); + +#endif diff --git a/bf/secp256k1/src/ecdsa_impl.h b/bf/secp256k1/src/ecdsa_impl.h new file mode 100644 index 0000000..c0c44fa --- /dev/null +++ b/bf/secp256k1/src/ecdsa_impl.h @@ -0,0 +1,264 @@ +/********************************************************************** + * Copyright (c) 2013, 2014 Pieter Wuille * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + + +#ifndef _SECP256K1_ECDSA_IMPL_H_ +#define _SECP256K1_ECDSA_IMPL_H_ + +#include "scalar.h" +#include "field.h" +#include "group.h" +#include "ecmult.h" +#include "ecmult_gen.h" +#include "ecdsa.h" + +/** Group order for secp256k1 defined as 'n' in "Standards for Efficient Cryptography" (SEC2) 2.7.1 + * sage: for t in xrange(1023, -1, -1): + * .. p = 2**256 - 2**32 - t + * .. if p.is_prime(): + * .. print '%x'%p + * .. break + * 'fffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffc2f' + * sage: a = 0 + * sage: b = 7 + * sage: F = FiniteField (p) + * sage: '%x' % (EllipticCurve ([F (a), F (b)]).order()) + * 'fffffffffffffffffffffffffffffffebaaedce6af48a03bbfd25e8cd0364141' + */ +static const secp256k1_fe_t secp256k1_ecdsa_const_order_as_fe = SECP256K1_FE_CONST( + 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFEUL, + 0xBAAEDCE6UL, 0xAF48A03BUL, 0xBFD25E8CUL, 0xD0364141UL +); + +/** Difference between field and order, values 'p' and 'n' values defined in + * "Standards for Efficient Cryptography" (SEC2) 2.7.1. + * sage: p = 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFC2F + * sage: a = 0 + * sage: b = 7 + * sage: F = FiniteField (p) + * sage: '%x' % (p - EllipticCurve ([F (a), F (b)]).order()) + * '14551231950b75fc4402da1722fc9baee' + */ +static const secp256k1_fe_t secp256k1_ecdsa_const_p_minus_order = SECP256K1_FE_CONST( + 0, 0, 0, 1, 0x45512319UL, 0x50B75FC4UL, 0x402DA172UL, 0x2FC9BAEEUL +); + +static int secp256k1_ecdsa_sig_parse(secp256k1_scalar_t *rr, secp256k1_scalar_t *rs, const unsigned char *sig, int size) { + unsigned char ra[32] = {0}, sa[32] = {0}; + const unsigned char *rp; + const unsigned char *sp; + int lenr; + int lens; + int overflow; + if (sig[0] != 0x30) { + return 0; + } + lenr = sig[3]; + if (5+lenr >= size) { + return 0; + } + lens = sig[lenr+5]; + if (sig[1] != lenr+lens+4) { + return 0; + } + if (lenr+lens+6 > size) { + return 0; + } + if (sig[2] != 0x02) { + return 0; + } + if (lenr == 0) { + return 0; + } + if (sig[lenr+4] != 0x02) { + return 0; + } + if (lens == 0) { + return 0; + } + sp = sig + 6 + lenr; + while (lens > 0 && sp[0] == 0) { + lens--; + sp++; + } + if (lens > 32) { + return 0; + } + rp = sig + 4; + while (lenr > 0 && rp[0] == 0) { + lenr--; + rp++; + } + if (lenr > 32) { + return 0; + } + memcpy(ra + 32 - lenr, rp, lenr); + memcpy(sa + 32 - lens, sp, lens); + overflow = 0; + secp256k1_scalar_set_b32(rr, ra, &overflow); + if (overflow) { + return 0; + } + secp256k1_scalar_set_b32(rs, sa, &overflow); + if (overflow) { + return 0; + } + return 1; +} + +static int secp256k1_ecdsa_sig_serialize(unsigned char *sig, int *size, const secp256k1_scalar_t* ar, const secp256k1_scalar_t* as) { + unsigned char r[33] = {0}, s[33] = {0}; + unsigned char *rp = r, *sp = s; + int lenR = 33, lenS = 33; + secp256k1_scalar_get_b32(&r[1], ar); + secp256k1_scalar_get_b32(&s[1], as); + while (lenR > 1 && rp[0] == 0 && rp[1] < 0x80) { lenR--; rp++; } + while (lenS > 1 && sp[0] == 0 && sp[1] < 0x80) { lenS--; sp++; } + if (*size < 6+lenS+lenR) { + *size = 6 + lenS + lenR; + return 0; + } + *size = 6 + lenS + lenR; + sig[0] = 0x30; + sig[1] = 4 + lenS + lenR; + sig[2] = 0x02; + sig[3] = lenR; + memcpy(sig+4, rp, lenR); + sig[4+lenR] = 0x02; + sig[5+lenR] = lenS; + memcpy(sig+lenR+6, sp, lenS); + return 1; +} + +static int secp256k1_ecdsa_sig_verify(const secp256k1_ecmult_context_t *ctx, const secp256k1_scalar_t *sigr, const secp256k1_scalar_t *sigs, const secp256k1_ge_t *pubkey, const secp256k1_scalar_t *message) { + unsigned char c[32]; + secp256k1_scalar_t sn, u1, u2; + secp256k1_fe_t xr; + secp256k1_gej_t pubkeyj; + secp256k1_gej_t pr; + + if (secp256k1_scalar_is_zero(sigr) || secp256k1_scalar_is_zero(sigs)) { + return 0; + } + + secp256k1_scalar_inverse_var(&sn, sigs); + secp256k1_scalar_mul(&u1, &sn, message); + secp256k1_scalar_mul(&u2, &sn, sigr); + secp256k1_gej_set_ge(&pubkeyj, pubkey); + secp256k1_ecmult(ctx, &pr, &pubkeyj, &u2, &u1); + if (secp256k1_gej_is_infinity(&pr)) { + return 0; + } + secp256k1_scalar_get_b32(c, sigr); + secp256k1_fe_set_b32(&xr, c); + + /** We now have the recomputed R point in pr, and its claimed x coordinate (modulo n) + * in xr. Naively, we would extract the x coordinate from pr (requiring a inversion modulo p), + * compute the remainder modulo n, and compare it to xr. However: + * + * xr == X(pr) mod n + * <=> exists h. (xr + h * n < p && xr + h * n == X(pr)) + * [Since 2 * n > p, h can only be 0 or 1] + * <=> (xr == X(pr)) || (xr + n < p && xr + n == X(pr)) + * [In Jacobian coordinates, X(pr) is pr.x / pr.z^2 mod p] + * <=> (xr == pr.x / pr.z^2 mod p) || (xr + n < p && xr + n == pr.x / pr.z^2 mod p) + * [Multiplying both sides of the equations by pr.z^2 mod p] + * <=> (xr * pr.z^2 mod p == pr.x) || (xr + n < p && (xr + n) * pr.z^2 mod p == pr.x) + * + * Thus, we can avoid the inversion, but we have to check both cases separately. + * secp256k1_gej_eq_x implements the (xr * pr.z^2 mod p == pr.x) test. + */ + if (secp256k1_gej_eq_x_var(&xr, &pr)) { + /* xr.x == xr * xr.z^2 mod p, so the signature is valid. */ + return 1; + } + if (secp256k1_fe_cmp_var(&xr, &secp256k1_ecdsa_const_p_minus_order) >= 0) { + /* xr + p >= n, so we can skip testing the second case. */ + return 0; + } + secp256k1_fe_add(&xr, &secp256k1_ecdsa_const_order_as_fe); + if (secp256k1_gej_eq_x_var(&xr, &pr)) { + /* (xr + n) * pr.z^2 mod p == pr.x, so the signature is valid. */ + return 1; + } + return 0; +} + +static int secp256k1_ecdsa_sig_recover(const secp256k1_ecmult_context_t *ctx, const secp256k1_scalar_t *sigr, const secp256k1_scalar_t* sigs, secp256k1_ge_t *pubkey, const secp256k1_scalar_t *message, int recid) { + unsigned char brx[32]; + secp256k1_fe_t fx; + secp256k1_ge_t x; + secp256k1_gej_t xj; + secp256k1_scalar_t rn, u1, u2; + secp256k1_gej_t qj; + + if (secp256k1_scalar_is_zero(sigr) || secp256k1_scalar_is_zero(sigs)) { + return 0; + } + + secp256k1_scalar_get_b32(brx, sigr); + VERIFY_CHECK(secp256k1_fe_set_b32(&fx, brx)); /* brx comes from a scalar, so is less than the order; certainly less than p */ + if (recid & 2) { + if (secp256k1_fe_cmp_var(&fx, &secp256k1_ecdsa_const_p_minus_order) >= 0) { + return 0; + } + secp256k1_fe_add(&fx, &secp256k1_ecdsa_const_order_as_fe); + } + if (!secp256k1_ge_set_xo_var(&x, &fx, recid & 1)) { + return 0; + } + secp256k1_gej_set_ge(&xj, &x); + secp256k1_scalar_inverse_var(&rn, sigr); + secp256k1_scalar_mul(&u1, &rn, message); + secp256k1_scalar_negate(&u1, &u1); + secp256k1_scalar_mul(&u2, &rn, sigs); + secp256k1_ecmult(ctx, &qj, &xj, &u2, &u1); + secp256k1_ge_set_gej_var(pubkey, &qj); + return !secp256k1_gej_is_infinity(&qj); +} + +static int secp256k1_ecdsa_sig_sign(const secp256k1_ecmult_gen_context_t *ctx, secp256k1_scalar_t *sigr, secp256k1_scalar_t *sigs, const secp256k1_scalar_t *seckey, const secp256k1_scalar_t *message, const secp256k1_scalar_t *nonce, int *recid) { + unsigned char b[32]; + secp256k1_gej_t rp; + secp256k1_ge_t r; + secp256k1_scalar_t n; + int overflow = 0; + + secp256k1_ecmult_gen(ctx, &rp, nonce); + secp256k1_ge_set_gej(&r, &rp); + secp256k1_fe_normalize(&r.x); + secp256k1_fe_normalize(&r.y); + secp256k1_fe_get_b32(b, &r.x); + secp256k1_scalar_set_b32(sigr, b, &overflow); + if (secp256k1_scalar_is_zero(sigr)) { + /* P.x = order is on the curve, so technically sig->r could end up zero, which would be an invalid signature. */ + secp256k1_gej_clear(&rp); + secp256k1_ge_clear(&r); + return 0; + } + if (recid) { + *recid = (overflow ? 2 : 0) | (secp256k1_fe_is_odd(&r.y) ? 1 : 0); + } + secp256k1_scalar_mul(&n, sigr, seckey); + secp256k1_scalar_add(&n, &n, message); + secp256k1_scalar_inverse(sigs, nonce); + secp256k1_scalar_mul(sigs, sigs, &n); + secp256k1_scalar_clear(&n); + secp256k1_gej_clear(&rp); + secp256k1_ge_clear(&r); + if (secp256k1_scalar_is_zero(sigs)) { + return 0; + } + if (secp256k1_scalar_is_high(sigs)) { + secp256k1_scalar_negate(sigs, sigs); + if (recid) { + *recid ^= 1; + } + } + return 1; +} + +#endif diff --git a/bf/secp256k1/src/eckey.h b/bf/secp256k1/src/eckey.h new file mode 100644 index 0000000..53b8184 --- /dev/null +++ b/bf/secp256k1/src/eckey.h @@ -0,0 +1,26 @@ +/********************************************************************** + * Copyright (c) 2013, 2014 Pieter Wuille * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#ifndef _SECP256K1_ECKEY_ +#define _SECP256K1_ECKEY_ + +#include "group.h" +#include "scalar.h" +#include "ecmult.h" +#include "ecmult_gen.h" + +static int secp256k1_eckey_pubkey_parse(secp256k1_ge_t *elem, const unsigned char *pub, int size); +static int secp256k1_eckey_pubkey_serialize(secp256k1_ge_t *elem, unsigned char *pub, int *size, int compressed); + +static int secp256k1_eckey_privkey_parse(secp256k1_scalar_t *key, const unsigned char *privkey, int privkeylen); +static int secp256k1_eckey_privkey_serialize(const secp256k1_ecmult_gen_context_t *ctx, unsigned char *privkey, int *privkeylen, const secp256k1_scalar_t *key, int compressed); + +static int secp256k1_eckey_privkey_tweak_add(secp256k1_scalar_t *key, const secp256k1_scalar_t *tweak); +static int secp256k1_eckey_pubkey_tweak_add(const secp256k1_ecmult_context_t *ctx, secp256k1_ge_t *key, const secp256k1_scalar_t *tweak); +static int secp256k1_eckey_privkey_tweak_mul(secp256k1_scalar_t *key, const secp256k1_scalar_t *tweak); +static int secp256k1_eckey_pubkey_tweak_mul(const secp256k1_ecmult_context_t *ctx, secp256k1_ge_t *key, const secp256k1_scalar_t *tweak); + +#endif diff --git a/bf/secp256k1/src/eckey_impl.h b/bf/secp256k1/src/eckey_impl.h new file mode 100644 index 0000000..a332bd3 --- /dev/null +++ b/bf/secp256k1/src/eckey_impl.h @@ -0,0 +1,202 @@ +/********************************************************************** + * Copyright (c) 2013, 2014 Pieter Wuille * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#ifndef _SECP256K1_ECKEY_IMPL_H_ +#define _SECP256K1_ECKEY_IMPL_H_ + +#include "eckey.h" + +#include "scalar.h" +#include "field.h" +#include "group.h" +#include "ecmult_gen.h" + +static int secp256k1_eckey_pubkey_parse(secp256k1_ge_t *elem, const unsigned char *pub, int size) { + if (size == 33 && (pub[0] == 0x02 || pub[0] == 0x03)) { + secp256k1_fe_t x; + return secp256k1_fe_set_b32(&x, pub+1) && secp256k1_ge_set_xo_var(elem, &x, pub[0] == 0x03); + } else if (size == 65 && (pub[0] == 0x04 || pub[0] == 0x06 || pub[0] == 0x07)) { + secp256k1_fe_t x, y; + if (!secp256k1_fe_set_b32(&x, pub+1) || !secp256k1_fe_set_b32(&y, pub+33)) { + return 0; + } + secp256k1_ge_set_xy(elem, &x, &y); + if ((pub[0] == 0x06 || pub[0] == 0x07) && secp256k1_fe_is_odd(&y) != (pub[0] == 0x07)) { + return 0; + } + return secp256k1_ge_is_valid_var(elem); + } else { + return 0; + } +} + +static int secp256k1_eckey_pubkey_serialize(secp256k1_ge_t *elem, unsigned char *pub, int *size, int compressed) { + if (secp256k1_ge_is_infinity(elem)) { + return 0; + } + secp256k1_fe_normalize_var(&elem->x); + secp256k1_fe_normalize_var(&elem->y); + secp256k1_fe_get_b32(&pub[1], &elem->x); + if (compressed) { + *size = 33; + pub[0] = 0x02 | (secp256k1_fe_is_odd(&elem->y) ? 0x01 : 0x00); + } else { + *size = 65; + pub[0] = 0x04; + secp256k1_fe_get_b32(&pub[33], &elem->y); + } + return 1; +} + +static int secp256k1_eckey_privkey_parse(secp256k1_scalar_t *key, const unsigned char *privkey, int privkeylen) { + unsigned char c[32] = {0}; + const unsigned char *end = privkey + privkeylen; + int lenb = 0; + int len = 0; + int overflow = 0; + /* sequence header */ + if (end < privkey+1 || *privkey != 0x30) { + return 0; + } + privkey++; + /* sequence length constructor */ + if (end < privkey+1 || !(*privkey & 0x80)) { + return 0; + } + lenb = *privkey & ~0x80; privkey++; + if (lenb < 1 || lenb > 2) { + return 0; + } + if (end < privkey+lenb) { + return 0; + } + /* sequence length */ + len = privkey[lenb-1] | (lenb > 1 ? privkey[lenb-2] << 8 : 0); + privkey += lenb; + if (end < privkey+len) { + return 0; + } + /* sequence element 0: version number (=1) */ + if (end < privkey+3 || privkey[0] != 0x02 || privkey[1] != 0x01 || privkey[2] != 0x01) { + return 0; + } + privkey += 3; + /* sequence element 1: octet string, up to 32 bytes */ + if (end < privkey+2 || privkey[0] != 0x04 || privkey[1] > 0x20 || end < privkey+2+privkey[1]) { + return 0; + } + memcpy(c + 32 - privkey[1], privkey + 2, privkey[1]); + secp256k1_scalar_set_b32(key, c, &overflow); + memset(c, 0, 32); + return !overflow; +} + +static int secp256k1_eckey_privkey_serialize(const secp256k1_ecmult_gen_context_t *ctx, unsigned char *privkey, int *privkeylen, const secp256k1_scalar_t *key, int compressed) { + secp256k1_gej_t rp; + secp256k1_ge_t r; + int pubkeylen = 0; + secp256k1_ecmult_gen(ctx, &rp, key); + secp256k1_ge_set_gej(&r, &rp); + if (compressed) { + static const unsigned char begin[] = { + 0x30,0x81,0xD3,0x02,0x01,0x01,0x04,0x20 + }; + static const unsigned char middle[] = { + 0xA0,0x81,0x85,0x30,0x81,0x82,0x02,0x01,0x01,0x30,0x2C,0x06,0x07,0x2A,0x86,0x48, + 0xCE,0x3D,0x01,0x01,0x02,0x21,0x00,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, + 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, + 0xFF,0xFF,0xFE,0xFF,0xFF,0xFC,0x2F,0x30,0x06,0x04,0x01,0x00,0x04,0x01,0x07,0x04, + 0x21,0x02,0x79,0xBE,0x66,0x7E,0xF9,0xDC,0xBB,0xAC,0x55,0xA0,0x62,0x95,0xCE,0x87, + 0x0B,0x07,0x02,0x9B,0xFC,0xDB,0x2D,0xCE,0x28,0xD9,0x59,0xF2,0x81,0x5B,0x16,0xF8, + 0x17,0x98,0x02,0x21,0x00,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, + 0xFF,0xFF,0xFF,0xFF,0xFE,0xBA,0xAE,0xDC,0xE6,0xAF,0x48,0xA0,0x3B,0xBF,0xD2,0x5E, + 0x8C,0xD0,0x36,0x41,0x41,0x02,0x01,0x01,0xA1,0x24,0x03,0x22,0x00 + }; + unsigned char *ptr = privkey; + memcpy(ptr, begin, sizeof(begin)); ptr += sizeof(begin); + secp256k1_scalar_get_b32(ptr, key); ptr += 32; + memcpy(ptr, middle, sizeof(middle)); ptr += sizeof(middle); + if (!secp256k1_eckey_pubkey_serialize(&r, ptr, &pubkeylen, 1)) { + return 0; + } + ptr += pubkeylen; + *privkeylen = ptr - privkey; + } else { + static const unsigned char begin[] = { + 0x30,0x82,0x01,0x13,0x02,0x01,0x01,0x04,0x20 + }; + static const unsigned char middle[] = { + 0xA0,0x81,0xA5,0x30,0x81,0xA2,0x02,0x01,0x01,0x30,0x2C,0x06,0x07,0x2A,0x86,0x48, + 0xCE,0x3D,0x01,0x01,0x02,0x21,0x00,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, + 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, + 0xFF,0xFF,0xFE,0xFF,0xFF,0xFC,0x2F,0x30,0x06,0x04,0x01,0x00,0x04,0x01,0x07,0x04, + 0x41,0x04,0x79,0xBE,0x66,0x7E,0xF9,0xDC,0xBB,0xAC,0x55,0xA0,0x62,0x95,0xCE,0x87, + 0x0B,0x07,0x02,0x9B,0xFC,0xDB,0x2D,0xCE,0x28,0xD9,0x59,0xF2,0x81,0x5B,0x16,0xF8, + 0x17,0x98,0x48,0x3A,0xDA,0x77,0x26,0xA3,0xC4,0x65,0x5D,0xA4,0xFB,0xFC,0x0E,0x11, + 0x08,0xA8,0xFD,0x17,0xB4,0x48,0xA6,0x85,0x54,0x19,0x9C,0x47,0xD0,0x8F,0xFB,0x10, + 0xD4,0xB8,0x02,0x21,0x00,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, + 0xFF,0xFF,0xFF,0xFF,0xFE,0xBA,0xAE,0xDC,0xE6,0xAF,0x48,0xA0,0x3B,0xBF,0xD2,0x5E, + 0x8C,0xD0,0x36,0x41,0x41,0x02,0x01,0x01,0xA1,0x44,0x03,0x42,0x00 + }; + unsigned char *ptr = privkey; + memcpy(ptr, begin, sizeof(begin)); ptr += sizeof(begin); + secp256k1_scalar_get_b32(ptr, key); ptr += 32; + memcpy(ptr, middle, sizeof(middle)); ptr += sizeof(middle); + if (!secp256k1_eckey_pubkey_serialize(&r, ptr, &pubkeylen, 0)) { + return 0; + } + ptr += pubkeylen; + *privkeylen = ptr - privkey; + } + return 1; +} + +static int secp256k1_eckey_privkey_tweak_add(secp256k1_scalar_t *key, const secp256k1_scalar_t *tweak) { + secp256k1_scalar_add(key, key, tweak); + if (secp256k1_scalar_is_zero(key)) { + return 0; + } + return 1; +} + +static int secp256k1_eckey_pubkey_tweak_add(const secp256k1_ecmult_context_t *ctx, secp256k1_ge_t *key, const secp256k1_scalar_t *tweak) { + secp256k1_gej_t pt; + secp256k1_scalar_t one; + secp256k1_gej_set_ge(&pt, key); + secp256k1_scalar_set_int(&one, 1); + secp256k1_ecmult(ctx, &pt, &pt, &one, tweak); + + if (secp256k1_gej_is_infinity(&pt)) { + return 0; + } + secp256k1_ge_set_gej(key, &pt); + return 1; +} + +static int secp256k1_eckey_privkey_tweak_mul(secp256k1_scalar_t *key, const secp256k1_scalar_t *tweak) { + if (secp256k1_scalar_is_zero(tweak)) { + return 0; + } + + secp256k1_scalar_mul(key, key, tweak); + return 1; +} + +static int secp256k1_eckey_pubkey_tweak_mul(const secp256k1_ecmult_context_t *ctx, secp256k1_ge_t *key, const secp256k1_scalar_t *tweak) { + secp256k1_scalar_t zero; + secp256k1_gej_t pt; + if (secp256k1_scalar_is_zero(tweak)) { + return 0; + } + + secp256k1_scalar_set_int(&zero, 0); + secp256k1_gej_set_ge(&pt, key); + secp256k1_ecmult(ctx, &pt, &pt, tweak, &zero); + secp256k1_ge_set_gej(key, &pt); + return 1; +} + +#endif diff --git a/bf/secp256k1/src/ecmult.h b/bf/secp256k1/src/ecmult.h new file mode 100644 index 0000000..f112cd9 --- /dev/null +++ b/bf/secp256k1/src/ecmult.h @@ -0,0 +1,31 @@ +/********************************************************************** + * Copyright (c) 2013, 2014 Pieter Wuille * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#ifndef _SECP256K1_ECMULT_ +#define _SECP256K1_ECMULT_ + +#include "num.h" +#include "group.h" + +typedef struct { + /* For accelerating the computation of a*P + b*G: */ + secp256k1_ge_storage_t (*pre_g)[]; /* odd multiples of the generator */ +#ifdef USE_ENDOMORPHISM + secp256k1_ge_storage_t (*pre_g_128)[]; /* odd multiples of 2^128*generator */ +#endif +} secp256k1_ecmult_context_t; + +static void secp256k1_ecmult_context_init(secp256k1_ecmult_context_t *ctx); +static void secp256k1_ecmult_context_build(secp256k1_ecmult_context_t *ctx, const callback_t *cb); +static void secp256k1_ecmult_context_clone(secp256k1_ecmult_context_t *dst, + const secp256k1_ecmult_context_t *src, const callback_t *cb); +static void secp256k1_ecmult_context_clear(secp256k1_ecmult_context_t *ctx); +static int secp256k1_ecmult_context_is_built(const secp256k1_ecmult_context_t *ctx); + +/** Double multiply: R = na*A + ng*G */ +static void secp256k1_ecmult(const secp256k1_ecmult_context_t *ctx, secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_scalar_t *na, const secp256k1_scalar_t *ng); + +#endif diff --git a/bf/secp256k1/src/ecmult_const.h b/bf/secp256k1/src/ecmult_const.h new file mode 100644 index 0000000..e8ff1fe --- /dev/null +++ b/bf/secp256k1/src/ecmult_const.h @@ -0,0 +1,15 @@ +/********************************************************************** + * Copyright (c) 2015 Andrew Poelstra * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#ifndef _SECP256K1_ECMULT_CONST_ +#define _SECP256K1_ECMULT_CONST_ + +#include "scalar.h" +#include "group.h" + +static void secp256k1_ecmult_const(secp256k1_gej_t *r, const secp256k1_ge_t *a, const secp256k1_scalar_t *q); + +#endif diff --git a/bf/secp256k1/src/ecmult_const_impl.h b/bf/secp256k1/src/ecmult_const_impl.h new file mode 100644 index 0000000..62406a2 --- /dev/null +++ b/bf/secp256k1/src/ecmult_const_impl.h @@ -0,0 +1,258 @@ +/********************************************************************** + * Copyright (c) 2015 Pieter Wuille, Andrew Poelstra * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#ifndef _SECP256K1_ECMULT_CONST_IMPL_ +#define _SECP256K1_ECMULT_CONST_IMPL_ + +#include "scalar.h" +#include "group.h" +#include "ecmult_const.h" +#include "ecmult_impl.h" + +#ifdef USE_ENDOMORPHISM + #define WNAF_BITS 128 +#else + #define WNAF_BITS 256 +#endif +#define WNAF_SIZE(w) ((WNAF_BITS + (w) - 1) / (w)) + +/* This is like `ECMULT_TABLE_GET_GE` but is constant time */ +#define ECMULT_CONST_TABLE_GET_GE(r,pre,n,w) do { \ + int m; \ + int abs_n = (n) * (((n) > 0) * 2 - 1); \ + int idx_n = abs_n / 2; \ + secp256k1_fe_t neg_y; \ + VERIFY_CHECK(((n) & 1) == 1); \ + VERIFY_CHECK((n) >= -((1 << ((w)-1)) - 1)); \ + VERIFY_CHECK((n) <= ((1 << ((w)-1)) - 1)); \ + VERIFY_SETUP(secp256k1_fe_clear(&(r)->x)); \ + VERIFY_SETUP(secp256k1_fe_clear(&(r)->y)); \ + for (m = 0; m < ECMULT_TABLE_SIZE(w); m++) { \ + /* This loop is used to avoid secret data in array indices. See + * the comment in ecmult_gen_impl.h for rationale. */ \ + secp256k1_fe_cmov(&(r)->x, &(pre)[m].x, m == idx_n); \ + secp256k1_fe_cmov(&(r)->y, &(pre)[m].y, m == idx_n); \ + } \ + (r)->infinity = 0; \ + secp256k1_fe_negate(&neg_y, &(r)->y, 1); \ + secp256k1_fe_cmov(&(r)->y, &neg_y, (n) != abs_n); \ +} while(0) + + +/** Convert a number to WNAF notation. The number becomes represented by sum(2^{wi} * wnaf[i], i=0..return_val) + * with the following guarantees: + * - each wnaf[i] an odd integer between -(1 << w) and (1 << w) + * - each wnaf[i] is nonzero + * - the number of words set is returned; this is always (WNAF_BITS + w - 1) / w + * + * Adapted from `The Width-w NAF Method Provides Small Memory and Fast Elliptic Scalar + * Multiplications Secure against Side Channel Attacks`, Okeya and Tagaki. M. Joye (Ed.) + * CT-RSA 2003, LNCS 2612, pp. 328-443, 2003. Springer-Verlagy Berlin Heidelberg 2003 + * + * Numbers reference steps of `Algorithm SPA-resistant Width-w NAF with Odd Scalar` on pp. 335 + */ +static int secp256k1_wnaf_const(int *wnaf, secp256k1_scalar_t s, int w) { + int global_sign = 1; + int skew = 0; + int word = 0; + /* 1 2 3 */ + int u_last; + int u; + +#ifdef USE_ENDOMORPHISM + /* If we are using the endomorphism, we cannot handle even numbers by negating + * them, since we are working with 128-bit numbers whose negations would be 256 + * bits, eliminating the performance advantage. Instead we use a technique from + * Section 4.2 of the Okeya/Tagaki paper, which is to add either 1 (for even) + * or 2 (for odd) to the number we are encoding, then compensating after the + * multiplication. */ + /* Negative 128-bit numbers will be negated, since otherwise they are 256-bit */ + int flip = secp256k1_scalar_is_high(&s); + /* We add 1 to even numbers, 2 to odd ones, noting that negation flips parity */ + int bit = flip ^ (s.d[0] & 1); + /* We check for negative one, since adding 2 to it will cause an overflow */ + secp256k1_scalar_t neg_s; + int not_neg_one; + secp256k1_scalar_negate(&neg_s, &s); + not_neg_one = !secp256k1_scalar_is_one(&neg_s); + secp256k1_scalar_cadd_bit(&s, bit, not_neg_one); + /* If we had negative one, flip == 1, s.d[0] == 0, bit == 1, so caller expects + * that we added two to it and flipped it. In fact for -1 these operations are + * identical. We only flipped, but since skewing is required (in the sense that + * the skew must be 1 or 2, never zero) and flipping is not, we need to change + * our flags to claim that we only skewed. */ + global_sign = secp256k1_scalar_cond_negate(&s, flip); + global_sign *= not_neg_one * 2 - 1; + skew = 1 << bit; +#else + /* Otherwise, we just negate to force oddness */ + int is_even = secp256k1_scalar_is_even(&s); + global_sign = secp256k1_scalar_cond_negate(&s, is_even); +#endif + + /* 4 */ + u_last = secp256k1_scalar_shr_int(&s, w); + while (word * w < WNAF_BITS) { + int sign; + int even; + + /* 4.1 4.4 */ + u = secp256k1_scalar_shr_int(&s, w); + /* 4.2 */ + even = ((u & 1) == 0); + sign = 2 * (u_last > 0) - 1; + u += sign * even; + u_last -= sign * even * (1 << w); + + /* 4.3, adapted for global sign change */ + wnaf[word++] = u_last * global_sign; + + u_last = u; + } + wnaf[word] = u * global_sign; + + VERIFY_CHECK(secp256k1_scalar_is_zero(&s)); + VERIFY_CHECK(word == WNAF_SIZE(w)); + return skew; +} + + +static void secp256k1_ecmult_const(secp256k1_gej_t *r, const secp256k1_ge_t *a, const secp256k1_scalar_t *scalar) { + secp256k1_ge_t pre_a[ECMULT_TABLE_SIZE(WINDOW_A)]; + secp256k1_ge_t tmpa; + secp256k1_fe_t Z; + +#ifdef USE_ENDOMORPHISM + secp256k1_ge_t pre_a_lam[ECMULT_TABLE_SIZE(WINDOW_A)]; + int wnaf_1[1 + WNAF_SIZE(WINDOW_A - 1)]; + int wnaf_lam[1 + WNAF_SIZE(WINDOW_A - 1)]; + int skew_1; + int skew_lam; + secp256k1_scalar_t q_1, q_lam; +#else + int wnaf[1 + WNAF_SIZE(WINDOW_A - 1)]; +#endif + + int i; + secp256k1_scalar_t sc = *scalar; + + /* build wnaf representation for q. */ +#ifdef USE_ENDOMORPHISM + /* split q into q_1 and q_lam (where q = q_1 + q_lam*lambda, and q_1 and q_lam are ~128 bit) */ + secp256k1_scalar_split_lambda(&q_1, &q_lam, &sc); + /* no need for zero correction when using endomorphism since even + * numbers have one added to them anyway */ + skew_1 = secp256k1_wnaf_const(wnaf_1, q_1, WINDOW_A - 1); + skew_lam = secp256k1_wnaf_const(wnaf_lam, q_lam, WINDOW_A - 1); +#else + int is_zero = secp256k1_scalar_is_zero(scalar); + /* the wNAF ladder cannot handle zero, so bump this to one .. we will + * correct the result after the fact */ + sc.d[0] += is_zero; + VERIFY_CHECK(!secp256k1_scalar_is_zero(&sc)); + + secp256k1_wnaf_const(wnaf, sc, WINDOW_A - 1); +#endif + + /* Calculate odd multiples of a. + * All multiples are brought to the same Z 'denominator', which is stored + * in Z. Due to secp256k1' isomorphism we can do all operations pretending + * that the Z coordinate was 1, use affine addition formulae, and correct + * the Z coordinate of the result once at the end. + */ + secp256k1_gej_set_ge(r, a); + secp256k1_ecmult_odd_multiples_table_globalz_windowa(pre_a, &Z, r); + for (i = 0; i < ECMULT_TABLE_SIZE(WINDOW_A); i++) { + secp256k1_fe_normalize_weak(&pre_a[i].y); + } +#ifdef USE_ENDOMORPHISM + for (i = 0; i < ECMULT_TABLE_SIZE(WINDOW_A); i++) { + secp256k1_ge_mul_lambda(&pre_a_lam[i], &pre_a[i]); + } +#endif + + /* first loop iteration (separated out so we can directly set r, rather + * than having it start at infinity, get doubled several times, then have + * its new value added to it) */ +#ifdef USE_ENDOMORPHISM + i = wnaf_1[WNAF_SIZE(WINDOW_A - 1)]; + VERIFY_CHECK(i != 0); + ECMULT_CONST_TABLE_GET_GE(&tmpa, pre_a, i, WINDOW_A); + secp256k1_gej_set_ge(r, &tmpa); + + i = wnaf_lam[WNAF_SIZE(WINDOW_A - 1)]; + VERIFY_CHECK(i != 0); + ECMULT_CONST_TABLE_GET_GE(&tmpa, pre_a_lam, i, WINDOW_A); + secp256k1_gej_add_ge(r, r, &tmpa); +#else + i = wnaf[WNAF_SIZE(WINDOW_A - 1)]; + VERIFY_CHECK(i != 0); + ECMULT_CONST_TABLE_GET_GE(&tmpa, pre_a, i, WINDOW_A); + secp256k1_gej_set_ge(r, &tmpa); +#endif + /* remaining loop iterations */ + for (i = WNAF_SIZE(WINDOW_A - 1) - 1; i >= 0; i--) { + int n; + int j; + for (j = 0; j < WINDOW_A - 1; ++j) { + secp256k1_gej_double_nonzero(r, r, NULL); + } +#ifdef USE_ENDOMORPHISM + n = wnaf_1[i]; + ECMULT_CONST_TABLE_GET_GE(&tmpa, pre_a, n, WINDOW_A); + VERIFY_CHECK(n != 0); + secp256k1_gej_add_ge(r, r, &tmpa); + + n = wnaf_lam[i]; + ECMULT_CONST_TABLE_GET_GE(&tmpa, pre_a_lam, n, WINDOW_A); + VERIFY_CHECK(n != 0); + secp256k1_gej_add_ge(r, r, &tmpa); +#else + n = wnaf[i]; + VERIFY_CHECK(n != 0); + ECMULT_CONST_TABLE_GET_GE(&tmpa, pre_a, n, WINDOW_A); + secp256k1_gej_add_ge(r, r, &tmpa); +#endif + } + + secp256k1_fe_mul(&r->z, &r->z, &Z); + +#ifdef USE_ENDOMORPHISM + { + /* Correct for wNAF skew */ + secp256k1_ge_t correction = *a; + secp256k1_ge_storage_t correction_1_stor; + secp256k1_ge_storage_t correction_lam_stor; + secp256k1_ge_storage_t a2_stor; + secp256k1_gej_t tmpj; + secp256k1_gej_set_ge(&tmpj, &correction); + secp256k1_gej_double_var(&tmpj, &tmpj, NULL); + secp256k1_ge_set_gej(&correction, &tmpj); + secp256k1_ge_to_storage(&correction_1_stor, a); + secp256k1_ge_to_storage(&correction_lam_stor, a); + secp256k1_ge_to_storage(&a2_stor, &correction); + + /* For odd numbers this is 2a (so replace it), for even ones a (so no-op) */ + secp256k1_ge_storage_cmov(&correction_1_stor, &a2_stor, skew_1 == 2); + secp256k1_ge_storage_cmov(&correction_lam_stor, &a2_stor, skew_lam == 2); + + /* Apply the correction */ + secp256k1_ge_from_storage(&correction, &correction_1_stor); + secp256k1_ge_neg(&correction, &correction); + secp256k1_gej_add_ge(r, r, &correction); + + secp256k1_ge_from_storage(&correction, &correction_lam_stor); + secp256k1_ge_neg(&correction, &correction); + secp256k1_ge_mul_lambda(&correction, &correction); + secp256k1_gej_add_ge(r, r, &correction); + } +#else + /* correct for zero */ + r->infinity |= is_zero; +#endif +} + +#endif diff --git a/bf/secp256k1/src/ecmult_gen.h b/bf/secp256k1/src/ecmult_gen.h new file mode 100644 index 0000000..a9419e4 --- /dev/null +++ b/bf/secp256k1/src/ecmult_gen.h @@ -0,0 +1,43 @@ +/********************************************************************** + * Copyright (c) 2013, 2014 Pieter Wuille * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#ifndef _SECP256K1_ECMULT_GEN_ +#define _SECP256K1_ECMULT_GEN_ + +#include "scalar.h" +#include "group.h" + +typedef struct { + /* For accelerating the computation of a*G: + * To harden against timing attacks, use the following mechanism: + * * Break up the multiplicand into groups of 4 bits, called n_0, n_1, n_2, ..., n_63. + * * Compute sum(n_i * 16^i * G + U_i, i=0..63), where: + * * U_i = U * 2^i (for i=0..62) + * * U_i = U * (1-2^63) (for i=63) + * where U is a point with no known corresponding scalar. Note that sum(U_i, i=0..63) = 0. + * For each i, and each of the 16 possible values of n_i, (n_i * 16^i * G + U_i) is + * precomputed (call it prec(i, n_i)). The formula now becomes sum(prec(i, n_i), i=0..63). + * None of the resulting prec group elements have a known scalar, and neither do any of + * the intermediate sums while computing a*G. + */ + secp256k1_ge_storage_t (*prec)[64][16]; /* prec[j][i] = 16^j * i * G + U_i */ + secp256k1_scalar_t blind; + secp256k1_gej_t initial; +} secp256k1_ecmult_gen_context_t; + +static void secp256k1_ecmult_gen_context_init(secp256k1_ecmult_gen_context_t* ctx); +static void secp256k1_ecmult_gen_context_build(secp256k1_ecmult_gen_context_t* ctx, const callback_t* cb); +static void secp256k1_ecmult_gen_context_clone(secp256k1_ecmult_gen_context_t *dst, + const secp256k1_ecmult_gen_context_t* src, const callback_t* cb); +static void secp256k1_ecmult_gen_context_clear(secp256k1_ecmult_gen_context_t* ctx); +static int secp256k1_ecmult_gen_context_is_built(const secp256k1_ecmult_gen_context_t* ctx); + +/** Multiply with the generator: R = a*G */ +static void secp256k1_ecmult_gen(const secp256k1_ecmult_gen_context_t* ctx, secp256k1_gej_t *r, const secp256k1_scalar_t *a); + +static void secp256k1_ecmult_gen_blind(secp256k1_ecmult_gen_context_t *ctx, const unsigned char *seed32); + +#endif diff --git a/bf/secp256k1/src/ecmult_gen_impl.h b/bf/secp256k1/src/ecmult_gen_impl.h new file mode 100644 index 0000000..2397abf --- /dev/null +++ b/bf/secp256k1/src/ecmult_gen_impl.h @@ -0,0 +1,205 @@ +/********************************************************************** + * Copyright (c) 2013, 2014, 2015 Pieter Wuille, Gregory Maxwell * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#ifndef _SECP256K1_ECMULT_GEN_IMPL_H_ +#define _SECP256K1_ECMULT_GEN_IMPL_H_ + +#include "scalar.h" +#include "group.h" +#include "ecmult_gen.h" +#include "hash_impl.h" +#ifdef USE_ECMULT_STATIC_PRECOMPUTATION +#include "ecmult_static_context.h" +#endif +static void secp256k1_ecmult_gen_context_init(secp256k1_ecmult_gen_context_t *ctx) { + ctx->prec = NULL; +} + +static void secp256k1_ecmult_gen_context_build(secp256k1_ecmult_gen_context_t *ctx, const callback_t* cb) { +#ifndef USE_ECMULT_STATIC_PRECOMPUTATION + secp256k1_ge_t prec[1024]; + secp256k1_gej_t gj; + secp256k1_gej_t nums_gej; + int i, j; +#endif + + if (ctx->prec != NULL) { + return; + } +#ifndef USE_ECMULT_STATIC_PRECOMPUTATION + ctx->prec = (secp256k1_ge_storage_t (*)[64][16])checked_malloc(cb, sizeof(*ctx->prec)); + + /* get the generator */ + secp256k1_gej_set_ge(&gj, &secp256k1_ge_const_g); + + /* Construct a group element with no known corresponding scalar (nothing up my sleeve). */ + { + static const unsigned char nums_b32[33] = "The scalar for this x is unknown"; + secp256k1_fe_t nums_x; + secp256k1_ge_t nums_ge; + VERIFY_CHECK(secp256k1_fe_set_b32(&nums_x, nums_b32)); + VERIFY_CHECK(secp256k1_ge_set_xo_var(&nums_ge, &nums_x, 0)); + secp256k1_gej_set_ge(&nums_gej, &nums_ge); + /* Add G to make the bits in x uniformly distributed. */ + secp256k1_gej_add_ge_var(&nums_gej, &nums_gej, &secp256k1_ge_const_g, NULL); + } + + /* compute prec. */ + { + secp256k1_gej_t precj[1024]; /* Jacobian versions of prec. */ + secp256k1_gej_t gbase; + secp256k1_gej_t numsbase; + gbase = gj; /* 16^j * G */ + numsbase = nums_gej; /* 2^j * nums. */ + for (j = 0; j < 64; j++) { + /* Set precj[j*16 .. j*16+15] to (numsbase, numsbase + gbase, ..., numsbase + 15*gbase). */ + precj[j*16] = numsbase; + for (i = 1; i < 16; i++) { + secp256k1_gej_add_var(&precj[j*16 + i], &precj[j*16 + i - 1], &gbase, NULL); + } + /* Multiply gbase by 16. */ + for (i = 0; i < 4; i++) { + secp256k1_gej_double_var(&gbase, &gbase, NULL); + } + /* Multiply numbase by 2. */ + secp256k1_gej_double_var(&numsbase, &numsbase, NULL); + if (j == 62) { + /* In the last iteration, numsbase is (1 - 2^j) * nums instead. */ + secp256k1_gej_neg(&numsbase, &numsbase); + secp256k1_gej_add_var(&numsbase, &numsbase, &nums_gej, NULL); + } + } + secp256k1_ge_set_all_gej_var(1024, prec, precj, cb); + } + for (j = 0; j < 64; j++) { + for (i = 0; i < 16; i++) { + secp256k1_ge_to_storage(&(*ctx->prec)[j][i], &prec[j*16 + i]); + } + } +#else + (void)cb; + ctx->prec = (secp256k1_ge_storage_t (*)[64][16])secp256k1_ecmult_static_context; +#endif + secp256k1_ecmult_gen_blind(ctx, NULL); +} + +static int secp256k1_ecmult_gen_context_is_built(const secp256k1_ecmult_gen_context_t* ctx) { + return ctx->prec != NULL; +} + +static void secp256k1_ecmult_gen_context_clone(secp256k1_ecmult_gen_context_t *dst, + const secp256k1_ecmult_gen_context_t *src, const callback_t* cb) { + if (src->prec == NULL) { + dst->prec = NULL; + } else { +#ifndef USE_ECMULT_STATIC_PRECOMPUTATION + dst->prec = (secp256k1_ge_storage_t (*)[64][16])checked_malloc(cb, sizeof(*dst->prec)); + memcpy(dst->prec, src->prec, sizeof(*dst->prec)); +#else + (void)cb; + dst->prec = src->prec; +#endif + dst->initial = src->initial; + dst->blind = src->blind; + } +} + +static void secp256k1_ecmult_gen_context_clear(secp256k1_ecmult_gen_context_t *ctx) { +#ifndef USE_ECMULT_STATIC_PRECOMPUTATION + free(ctx->prec); +#endif + secp256k1_scalar_clear(&ctx->blind); + secp256k1_gej_clear(&ctx->initial); + ctx->prec = NULL; +} + +static void secp256k1_ecmult_gen(const secp256k1_ecmult_gen_context_t *ctx, secp256k1_gej_t *r, const secp256k1_scalar_t *gn) { + secp256k1_ge_t add; + secp256k1_ge_storage_t adds; + secp256k1_scalar_t gnb; + int bits; + int i, j; + memset(&adds, 0, sizeof(adds)); + *r = ctx->initial; + /* Blind scalar/point multiplication by computing (n-b)G + bG instead of nG. */ + secp256k1_scalar_add(&gnb, gn, &ctx->blind); + add.infinity = 0; + for (j = 0; j < 64; j++) { + bits = secp256k1_scalar_get_bits(&gnb, j * 4, 4); + for (i = 0; i < 16; i++) { + /** This uses a conditional move to avoid any secret data in array indexes. + * _Any_ use of secret indexes has been demonstrated to result in timing + * sidechannels, even when the cache-line access patterns are uniform. + * See also: + * "A word of warning", CHES 2013 Rump Session, by Daniel J. Bernstein and Peter Schwabe + * (https://cryptojedi.org/peter/data/chesrump-20130822.pdf) and + * "Cache Attacks and Countermeasures: the Case of AES", RSA 2006, + * by Dag Arne Osvik, Adi Shamir, and Eran Tromer + * (http://www.tau.ac.il/~tromer/papers/cache.pdf) + */ + secp256k1_ge_storage_cmov(&adds, &(*ctx->prec)[j][i], i == bits); + } + secp256k1_ge_from_storage(&add, &adds); + secp256k1_gej_add_ge(r, r, &add); + } + bits = 0; + secp256k1_ge_clear(&add); + secp256k1_scalar_clear(&gnb); +} + +/* Setup blinding values for secp256k1_ecmult_gen. */ +static void secp256k1_ecmult_gen_blind(secp256k1_ecmult_gen_context_t *ctx, const unsigned char *seed32) { + secp256k1_scalar_t b; + secp256k1_gej_t gb; + secp256k1_fe_t s; + unsigned char nonce32[32]; + secp256k1_rfc6979_hmac_sha256_t rng; + int retry; + unsigned char keydata[64] = {0}; + if (!seed32) { + /* When seed is NULL, reset the initial point and blinding value. */ + secp256k1_gej_set_ge(&ctx->initial, &secp256k1_ge_const_g); + secp256k1_gej_neg(&ctx->initial, &ctx->initial); + secp256k1_scalar_set_int(&ctx->blind, 1); + } + /* The prior blinding value (if not reset) is chained forward by including it in the hash. */ + secp256k1_scalar_get_b32(nonce32, &ctx->blind); + /** Using a CSPRNG allows a failure free interface, avoids needing large amounts of random data, + * and guards against weak or adversarial seeds. This is a simpler and safer interface than + * asking the caller for blinding values directly and expecting them to retry on failure. + */ + memcpy(keydata, nonce32, 32); + if (seed32) { + memcpy(keydata + 32, seed32, 32); + } + secp256k1_rfc6979_hmac_sha256_initialize(&rng, keydata, seed32 ? 64 : 32); + memset(keydata, 0, sizeof(keydata)); + /* Retry for out of range results to achieve uniformity. */ + do { + secp256k1_rfc6979_hmac_sha256_generate(&rng, nonce32, 32); + retry = !secp256k1_fe_set_b32(&s, nonce32); + retry |= secp256k1_fe_is_zero(&s); + } while (retry); + /* Randomize the projection to defend against multiplier sidechannels. */ + secp256k1_gej_rescale(&ctx->initial, &s); + secp256k1_fe_clear(&s); + do { + secp256k1_rfc6979_hmac_sha256_generate(&rng, nonce32, 32); + secp256k1_scalar_set_b32(&b, nonce32, &retry); + /* A blinding value of 0 works, but would undermine the projection hardening. */ + retry |= secp256k1_scalar_is_zero(&b); + } while (retry); + secp256k1_rfc6979_hmac_sha256_finalize(&rng); + memset(nonce32, 0, 32); + secp256k1_ecmult_gen(ctx, &gb, &b); + secp256k1_scalar_negate(&b, &b); + ctx->blind = b; + ctx->initial = gb; + secp256k1_scalar_clear(&b); + secp256k1_gej_clear(&gb); +} + +#endif diff --git a/bf/secp256k1/src/ecmult_impl.h b/bf/secp256k1/src/ecmult_impl.h new file mode 100644 index 0000000..d6da220 --- /dev/null +++ b/bf/secp256k1/src/ecmult_impl.h @@ -0,0 +1,389 @@ +/********************************************************************** + * Copyright (c) 2013, 2014 Pieter Wuille * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#ifndef _SECP256K1_ECMULT_IMPL_H_ +#define _SECP256K1_ECMULT_IMPL_H_ + +#include "group.h" +#include "scalar.h" +#include "ecmult.h" + +/* optimal for 128-bit and 256-bit exponents. */ +#define WINDOW_A 5 + +/** larger numbers may result in slightly better performance, at the cost of + exponentially larger precomputed tables. */ +#ifdef USE_ENDOMORPHISM +/** Two tables for window size 15: 1.375 MiB. */ +#define WINDOW_G 15 +#else +/** One table for window size 16: 1.375 MiB. */ +#define WINDOW_G 16 +#endif + +/** The number of entries a table with precomputed multiples needs to have. */ +#define ECMULT_TABLE_SIZE(w) (1 << ((w)-2)) + +/** Fill a table 'prej' with precomputed odd multiples of a. Prej will contain + * the values [1*a,3*a,...,(2*n-1)*a], so it space for n values. zr[0] will + * contain prej[0].z / a.z. The other zr[i] values = prej[i].z / prej[i-1].z. + * Prej's Z values are undefined, except for the last value. + */ +static void secp256k1_ecmult_odd_multiples_table(int n, secp256k1_gej_t *prej, secp256k1_fe_t *zr, const secp256k1_gej_t *a) { + secp256k1_gej_t d; + secp256k1_ge_t a_ge, d_ge; + int i; + + VERIFY_CHECK(!a->infinity); + + secp256k1_gej_double_var(&d, a, NULL); + + /* + * Perform the additions on an isomorphism where 'd' is affine: drop the z coordinate + * of 'd', and scale the 1P starting value's x/y coordinates without changing its z. + */ + d_ge.x = d.x; + d_ge.y = d.y; + d_ge.infinity = 0; + + secp256k1_ge_set_gej_zinv(&a_ge, a, &d.z); + prej[0].x = a_ge.x; + prej[0].y = a_ge.y; + prej[0].z = a->z; + prej[0].infinity = 0; + + zr[0] = d.z; + for (i = 1; i < n; i++) { + secp256k1_gej_add_ge_var(&prej[i], &prej[i-1], &d_ge, &zr[i]); + } + + /* + * Each point in 'prej' has a z coordinate too small by a factor of 'd.z'. Only + * the final point's z coordinate is actually used though, so just update that. + */ + secp256k1_fe_mul(&prej[n-1].z, &prej[n-1].z, &d.z); +} + +/** Fill a table 'pre' with precomputed odd multiples of a. + * + * There are two versions of this function: + * - secp256k1_ecmult_odd_multiples_table_globalz_windowa which brings its + * resulting point set to a single constant Z denominator, stores the X and Y + * coordinates as ge_storage points in pre, and stores the global Z in rz. + * It only operates on tables sized for WINDOW_A wnaf multiples. + * - secp256k1_ecmult_odd_multiples_table_storage_var, which converts its + * resulting point set to actually affine points, and stores those in pre. + * It operates on tables of any size, but uses heap-allocated temporaries. + * + * To compute a*P + b*G, we compute a table for P using the first function, + * and for G using the second (which requires an inverse, but it only needs to + * happen once). + */ +static void secp256k1_ecmult_odd_multiples_table_globalz_windowa(secp256k1_ge_t *pre, secp256k1_fe_t *globalz, const secp256k1_gej_t *a) { + secp256k1_gej_t prej[ECMULT_TABLE_SIZE(WINDOW_A)]; + secp256k1_fe_t zr[ECMULT_TABLE_SIZE(WINDOW_A)]; + + /* Compute the odd multiples in Jacobian form. */ + secp256k1_ecmult_odd_multiples_table(ECMULT_TABLE_SIZE(WINDOW_A), prej, zr, a); + /* Bring them to the same Z denominator. */ + secp256k1_ge_globalz_set_table_gej(ECMULT_TABLE_SIZE(WINDOW_A), pre, globalz, prej, zr); +} + +static void secp256k1_ecmult_odd_multiples_table_storage_var(int n, secp256k1_ge_storage_t *pre, const secp256k1_gej_t *a, const callback_t *cb) { + secp256k1_gej_t *prej = (secp256k1_gej_t*)checked_malloc(cb, sizeof(secp256k1_gej_t) * n); + secp256k1_ge_t *prea = (secp256k1_ge_t*)checked_malloc(cb, sizeof(secp256k1_ge_t) * n); + secp256k1_fe_t *zr = (secp256k1_fe_t*)checked_malloc(cb, sizeof(secp256k1_fe_t) * n); + int i; + + /* Compute the odd multiples in Jacobian form. */ + secp256k1_ecmult_odd_multiples_table(n, prej, zr, a); + /* Convert them in batch to affine coordinates. */ + secp256k1_ge_set_table_gej_var(n, prea, prej, zr); + /* Convert them to compact storage form. */ + for (i = 0; i < n; i++) { + secp256k1_ge_to_storage(&pre[i], &prea[i]); + } + + free(prea); + free(prej); + free(zr); +} + +/** The following two macro retrieves a particular odd multiple from a table + * of precomputed multiples. */ +#define ECMULT_TABLE_GET_GE(r,pre,n,w) do { \ + VERIFY_CHECK(((n) & 1) == 1); \ + VERIFY_CHECK((n) >= -((1 << ((w)-1)) - 1)); \ + VERIFY_CHECK((n) <= ((1 << ((w)-1)) - 1)); \ + if ((n) > 0) { \ + *(r) = (pre)[((n)-1)/2]; \ + } else { \ + secp256k1_ge_neg((r), &(pre)[(-(n)-1)/2]); \ + } \ +} while(0) + +#define ECMULT_TABLE_GET_GE_STORAGE(r,pre,n,w) do { \ + VERIFY_CHECK(((n) & 1) == 1); \ + VERIFY_CHECK((n) >= -((1 << ((w)-1)) - 1)); \ + VERIFY_CHECK((n) <= ((1 << ((w)-1)) - 1)); \ + if ((n) > 0) { \ + secp256k1_ge_from_storage((r), &(pre)[((n)-1)/2]); \ + } else { \ + secp256k1_ge_from_storage((r), &(pre)[(-(n)-1)/2]); \ + secp256k1_ge_neg((r), (r)); \ + } \ +} while(0) + +static void secp256k1_ecmult_context_init(secp256k1_ecmult_context_t *ctx) { + ctx->pre_g = NULL; +#ifdef USE_ENDOMORPHISM + ctx->pre_g_128 = NULL; +#endif +} + +static void secp256k1_ecmult_context_build(secp256k1_ecmult_context_t *ctx, const callback_t *cb) { + secp256k1_gej_t gj; + + if (ctx->pre_g != NULL) { + return; + } + + /* get the generator */ + secp256k1_gej_set_ge(&gj, &secp256k1_ge_const_g); + + ctx->pre_g = (secp256k1_ge_storage_t (*)[])checked_malloc(cb, sizeof((*ctx->pre_g)[0]) * ECMULT_TABLE_SIZE(WINDOW_G)); + + /* precompute the tables with odd multiples */ + secp256k1_ecmult_odd_multiples_table_storage_var(ECMULT_TABLE_SIZE(WINDOW_G), *ctx->pre_g, &gj, cb); + +#ifdef USE_ENDOMORPHISM + { + secp256k1_gej_t g_128j; + int i; + + ctx->pre_g_128 = (secp256k1_ge_storage_t (*)[])checked_malloc(cb, sizeof((*ctx->pre_g_128)[0]) * ECMULT_TABLE_SIZE(WINDOW_G)); + + /* calculate 2^128*generator */ + g_128j = gj; + for (i = 0; i < 128; i++) { + secp256k1_gej_double_var(&g_128j, &g_128j, NULL); + } + secp256k1_ecmult_odd_multiples_table_storage_var(ECMULT_TABLE_SIZE(WINDOW_G), *ctx->pre_g_128, &g_128j, cb); + } +#endif +} + +static void secp256k1_ecmult_context_clone(secp256k1_ecmult_context_t *dst, + const secp256k1_ecmult_context_t *src, const callback_t *cb) { + if (src->pre_g == NULL) { + dst->pre_g = NULL; + } else { + size_t size = sizeof((*dst->pre_g)[0]) * ECMULT_TABLE_SIZE(WINDOW_G); + dst->pre_g = (secp256k1_ge_storage_t (*)[])checked_malloc(cb, size); + memcpy(dst->pre_g, src->pre_g, size); + } +#ifdef USE_ENDOMORPHISM + if (src->pre_g_128 == NULL) { + dst->pre_g_128 = NULL; + } else { + size_t size = sizeof((*dst->pre_g_128)[0]) * ECMULT_TABLE_SIZE(WINDOW_G); + dst->pre_g_128 = (secp256k1_ge_storage_t (*)[])checked_malloc(cb, size); + memcpy(dst->pre_g_128, src->pre_g_128, size); + } +#endif +} + +static int secp256k1_ecmult_context_is_built(const secp256k1_ecmult_context_t *ctx) { + return ctx->pre_g != NULL; +} + +static void secp256k1_ecmult_context_clear(secp256k1_ecmult_context_t *ctx) { + free(ctx->pre_g); +#ifdef USE_ENDOMORPHISM + free(ctx->pre_g_128); +#endif + secp256k1_ecmult_context_init(ctx); +} + +/** Convert a number to WNAF notation. The number becomes represented by sum(2^i * wnaf[i], i=0..bits), + * with the following guarantees: + * - each wnaf[i] is either 0, or an odd integer between -(1<<(w-1) - 1) and (1<<(w-1) - 1) + * - two non-zero entries in wnaf are separated by at least w-1 zeroes. + * - the number of set values in wnaf is returned. This number is at most 256, and at most one more + * than the number of bits in the (absolute value) of the input. + */ +static int secp256k1_ecmult_wnaf(int *wnaf, int len, const secp256k1_scalar_t *a, int w) { + secp256k1_scalar_t s = *a; + int last_set_bit = -1; + int bit = 0; + int sign = 1; + int carry = 0; + + VERIFY_CHECK(wnaf != NULL); + VERIFY_CHECK(0 <= len && len <= 256); + VERIFY_CHECK(a != NULL); + VERIFY_CHECK(2 <= w && w <= 31); + + memset(wnaf, 0, len * sizeof(wnaf[0])); + + if (secp256k1_scalar_get_bits(&s, 255, 1)) { + secp256k1_scalar_negate(&s, &s); + sign = -1; + } + + while (bit < len) { + int now; + int word; + if (secp256k1_scalar_get_bits(&s, bit, 1) == (unsigned int)carry) { + bit++; + continue; + } + + now = w; + if (now > len - bit) { + now = len - bit; + } + + word = secp256k1_scalar_get_bits_var(&s, bit, now) + carry; + + carry = (word >> (w-1)) & 1; + word -= carry << w; + + wnaf[bit] = sign * word; + last_set_bit = bit; + + bit += now; + } +#ifdef VERIFY + CHECK(carry == 0); + while (bit < 256) { + CHECK(secp256k1_scalar_get_bits(&s, bit++, 1) == 0); + } +#endif + return last_set_bit + 1; +} + +static void secp256k1_ecmult(const secp256k1_ecmult_context_t *ctx, secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_scalar_t *na, const secp256k1_scalar_t *ng) { + secp256k1_ge_t pre_a[ECMULT_TABLE_SIZE(WINDOW_A)]; + secp256k1_ge_t tmpa; + secp256k1_fe_t Z; +#ifdef USE_ENDOMORPHISM + secp256k1_ge_t pre_a_lam[ECMULT_TABLE_SIZE(WINDOW_A)]; + secp256k1_scalar_t na_1, na_lam; + /* Splitted G factors. */ + secp256k1_scalar_t ng_1, ng_128; + int wnaf_na_1[130]; + int wnaf_na_lam[130]; + int bits_na_1; + int bits_na_lam; + int wnaf_ng_1[129]; + int bits_ng_1; + int wnaf_ng_128[129]; + int bits_ng_128; +#else + int wnaf_na[256]; + int bits_na; + int wnaf_ng[256]; + int bits_ng; +#endif + int i; + int bits; + +#ifdef USE_ENDOMORPHISM + /* split na into na_1 and na_lam (where na = na_1 + na_lam*lambda, and na_1 and na_lam are ~128 bit) */ + secp256k1_scalar_split_lambda(&na_1, &na_lam, na); + + /* build wnaf representation for na_1 and na_lam. */ + bits_na_1 = secp256k1_ecmult_wnaf(wnaf_na_1, 130, &na_1, WINDOW_A); + bits_na_lam = secp256k1_ecmult_wnaf(wnaf_na_lam, 130, &na_lam, WINDOW_A); + VERIFY_CHECK(bits_na_1 <= 130); + VERIFY_CHECK(bits_na_lam <= 130); + bits = bits_na_1; + if (bits_na_lam > bits) { + bits = bits_na_lam; + } +#else + /* build wnaf representation for na. */ + bits_na = secp256k1_ecmult_wnaf(wnaf_na, 256, na, WINDOW_A); + bits = bits_na; +#endif + + /* Calculate odd multiples of a. + * All multiples are brought to the same Z 'denominator', which is stored + * in Z. Due to secp256k1' isomorphism we can do all operations pretending + * that the Z coordinate was 1, use affine addition formulae, and correct + * the Z coordinate of the result once at the end. + * The exception is the precomputed G table points, which are actually + * affine. Compared to the base used for other points, they have a Z ratio + * of 1/Z, so we can use secp256k1_gej_add_zinv_var, which uses the same + * isomorphism to efficiently add with a known Z inverse. + */ + secp256k1_ecmult_odd_multiples_table_globalz_windowa(pre_a, &Z, a); + +#ifdef USE_ENDOMORPHISM + for (i = 0; i < ECMULT_TABLE_SIZE(WINDOW_A); i++) { + secp256k1_ge_mul_lambda(&pre_a_lam[i], &pre_a[i]); + } + + /* split ng into ng_1 and ng_128 (where gn = gn_1 + gn_128*2^128, and gn_1 and gn_128 are ~128 bit) */ + secp256k1_scalar_split_128(&ng_1, &ng_128, ng); + + /* Build wnaf representation for ng_1 and ng_128 */ + bits_ng_1 = secp256k1_ecmult_wnaf(wnaf_ng_1, 129, &ng_1, WINDOW_G); + bits_ng_128 = secp256k1_ecmult_wnaf(wnaf_ng_128, 129, &ng_128, WINDOW_G); + if (bits_ng_1 > bits) { + bits = bits_ng_1; + } + if (bits_ng_128 > bits) { + bits = bits_ng_128; + } +#else + bits_ng = secp256k1_ecmult_wnaf(wnaf_ng, 256, ng, WINDOW_G); + if (bits_ng > bits) { + bits = bits_ng; + } +#endif + + secp256k1_gej_set_infinity(r); + + for (i = bits - 1; i >= 0; i--) { + int n; + secp256k1_gej_double_var(r, r, NULL); +#ifdef USE_ENDOMORPHISM + if (i < bits_na_1 && (n = wnaf_na_1[i])) { + ECMULT_TABLE_GET_GE(&tmpa, pre_a, n, WINDOW_A); + secp256k1_gej_add_ge_var(r, r, &tmpa, NULL); + } + if (i < bits_na_lam && (n = wnaf_na_lam[i])) { + ECMULT_TABLE_GET_GE(&tmpa, pre_a_lam, n, WINDOW_A); + secp256k1_gej_add_ge_var(r, r, &tmpa, NULL); + } + if (i < bits_ng_1 && (n = wnaf_ng_1[i])) { + ECMULT_TABLE_GET_GE_STORAGE(&tmpa, *ctx->pre_g, n, WINDOW_G); + secp256k1_gej_add_zinv_var(r, r, &tmpa, &Z); + } + if (i < bits_ng_128 && (n = wnaf_ng_128[i])) { + ECMULT_TABLE_GET_GE_STORAGE(&tmpa, *ctx->pre_g_128, n, WINDOW_G); + secp256k1_gej_add_zinv_var(r, r, &tmpa, &Z); + } +#else + if (i < bits_na && (n = wnaf_na[i])) { + ECMULT_TABLE_GET_GE(&tmpa, pre_a, n, WINDOW_A); + secp256k1_gej_add_ge_var(r, r, &tmpa, NULL); + } + if (i < bits_ng && (n = wnaf_ng[i])) { + ECMULT_TABLE_GET_GE_STORAGE(&tmpa, *ctx->pre_g, n, WINDOW_G); + secp256k1_gej_add_zinv_var(r, r, &tmpa, &Z); + } +#endif + } + + if (!r->infinity) { + secp256k1_fe_mul(&r->z, &r->z, &Z); + } +} + +#endif diff --git a/bf/secp256k1/src/field.h b/bf/secp256k1/src/field.h new file mode 100644 index 0000000..2b2e0eb --- /dev/null +++ b/bf/secp256k1/src/field.h @@ -0,0 +1,119 @@ +/********************************************************************** + * Copyright (c) 2013, 2014 Pieter Wuille * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#ifndef _SECP256K1_FIELD_ +#define _SECP256K1_FIELD_ + +/** Field element module. + * + * Field elements can be represented in several ways, but code accessing + * it (and implementations) need to take certain properaties into account: + * - Each field element can be normalized or not. + * - Each field element has a magnitude, which represents how far away + * its representation is away from normalization. Normalized elements + * always have a magnitude of 1, but a magnitude of 1 doesn't imply + * normality. + */ + +#if defined HAVE_CONFIG_H +#include "libsecp256k1-config.h" +#endif + +#if defined(USE_FIELD_10X26) +#include "field_10x26.h" +#elif defined(USE_FIELD_5X52) +#include "field_5x52.h" +#else +#error "Please select field implementation" +#endif + +/** Normalize a field element. */ +static void secp256k1_fe_normalize(secp256k1_fe_t *r); + +/** Weakly normalize a field element: reduce it magnitude to 1, but don't fully normalize. */ +static void secp256k1_fe_normalize_weak(secp256k1_fe_t *r); + +/** Normalize a field element, without constant-time guarantee. */ +static void secp256k1_fe_normalize_var(secp256k1_fe_t *r); + +/** Verify whether a field element represents zero i.e. would normalize to a zero value. The field + * implementation may optionally normalize the input, but this should not be relied upon. */ +static int secp256k1_fe_normalizes_to_zero(secp256k1_fe_t *r); + +/** Verify whether a field element represents zero i.e. would normalize to a zero value. The field + * implementation may optionally normalize the input, but this should not be relied upon. */ +static int secp256k1_fe_normalizes_to_zero_var(secp256k1_fe_t *r); + +/** Set a field element equal to a small integer. Resulting field element is normalized. */ +static void secp256k1_fe_set_int(secp256k1_fe_t *r, int a); + +/** Verify whether a field element is zero. Requires the input to be normalized. */ +static int secp256k1_fe_is_zero(const secp256k1_fe_t *a); + +/** Check the "oddness" of a field element. Requires the input to be normalized. */ +static int secp256k1_fe_is_odd(const secp256k1_fe_t *a); + +/** Compare two field elements. Requires magnitude-1 inputs. */ +static int secp256k1_fe_equal_var(const secp256k1_fe_t *a, const secp256k1_fe_t *b); + +/** Compare two field elements. Requires both inputs to be normalized */ +static int secp256k1_fe_cmp_var(const secp256k1_fe_t *a, const secp256k1_fe_t *b); + +/** Set a field element equal to 32-byte big endian value. If successful, the resulting field element is normalized. */ +static int secp256k1_fe_set_b32(secp256k1_fe_t *r, const unsigned char *a); + +/** Convert a field element to a 32-byte big endian value. Requires the input to be normalized */ +static void secp256k1_fe_get_b32(unsigned char *r, const secp256k1_fe_t *a); + +/** Set a field element equal to the additive inverse of another. Takes a maximum magnitude of the input + * as an argument. The magnitude of the output is one higher. */ +static void secp256k1_fe_negate(secp256k1_fe_t *r, const secp256k1_fe_t *a, int m); + +/** Multiplies the passed field element with a small integer constant. Multiplies the magnitude by that + * small integer. */ +static void secp256k1_fe_mul_int(secp256k1_fe_t *r, int a); + +/** Adds a field element to another. The result has the sum of the inputs' magnitudes as magnitude. */ +static void secp256k1_fe_add(secp256k1_fe_t *r, const secp256k1_fe_t *a); + +/** Sets a field element to be the product of two others. Requires the inputs' magnitudes to be at most 8. + * The output magnitude is 1 (but not guaranteed to be normalized). */ +static void secp256k1_fe_mul(secp256k1_fe_t *r, const secp256k1_fe_t *a, const secp256k1_fe_t * SECP256K1_RESTRICT b); + +/** Sets a field element to be the square of another. Requires the input's magnitude to be at most 8. + * The output magnitude is 1 (but not guaranteed to be normalized). */ +static void secp256k1_fe_sqr(secp256k1_fe_t *r, const secp256k1_fe_t *a); + +/** Sets a field element to be the (modular) square root (if any exist) of another. Requires the + * input's magnitude to be at most 8. The output magnitude is 1 (but not guaranteed to be + * normalized). Return value indicates whether a square root was found. */ +static int secp256k1_fe_sqrt_var(secp256k1_fe_t *r, const secp256k1_fe_t *a); + +/** Sets a field element to be the (modular) inverse of another. Requires the input's magnitude to be + * at most 8. The output magnitude is 1 (but not guaranteed to be normalized). */ +static void secp256k1_fe_inv(secp256k1_fe_t *r, const secp256k1_fe_t *a); + +/** Potentially faster version of secp256k1_fe_inv, without constant-time guarantee. */ +static void secp256k1_fe_inv_var(secp256k1_fe_t *r, const secp256k1_fe_t *a); + +/** Calculate the (modular) inverses of a batch of field elements. Requires the inputs' magnitudes to be + * at most 8. The output magnitudes are 1 (but not guaranteed to be normalized). The inputs and + * outputs must not overlap in memory. */ +static void secp256k1_fe_inv_all_var(size_t len, secp256k1_fe_t *r, const secp256k1_fe_t *a); + +/** Convert a field element to the storage type. */ +static void secp256k1_fe_to_storage(secp256k1_fe_storage_t *r, const secp256k1_fe_t*); + +/** Convert a field element back from the storage type. */ +static void secp256k1_fe_from_storage(secp256k1_fe_t *r, const secp256k1_fe_storage_t*); + +/** If flag is true, set *r equal to *a; otherwise leave it. Constant-time. */ +static void secp256k1_fe_storage_cmov(secp256k1_fe_storage_t *r, const secp256k1_fe_storage_t *a, int flag); + +/** If flag is true, set *r equal to *a; otherwise leave it. Constant-time. */ +static void secp256k1_fe_cmov(secp256k1_fe_t *r, const secp256k1_fe_t *a, int flag); + +#endif diff --git a/bf/secp256k1/src/field_10x26.h b/bf/secp256k1/src/field_10x26.h new file mode 100644 index 0000000..cc4c74a --- /dev/null +++ b/bf/secp256k1/src/field_10x26.h @@ -0,0 +1,47 @@ +/********************************************************************** + * Copyright (c) 2013, 2014 Pieter Wuille * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#ifndef _SECP256K1_FIELD_REPR_ +#define _SECP256K1_FIELD_REPR_ + +#include + +typedef struct { + /* X = sum(i=0..9, elem[i]*2^26) mod n */ + uint32_t n[10]; +#ifdef VERIFY + int magnitude; + int normalized; +#endif +} secp256k1_fe_t; + +/* Unpacks a constant into a overlapping multi-limbed FE element. */ +#define SECP256K1_FE_CONST_INNER(d7, d6, d5, d4, d3, d2, d1, d0) { \ + (d0) & 0x3FFFFFFUL, \ + (((uint32_t)d0) >> 26) | ((uint32_t)(d1) & 0xFFFFFUL) << 6, \ + (((uint32_t)d1) >> 20) | ((uint32_t)(d2) & 0x3FFFUL) << 12, \ + (((uint32_t)d2) >> 14) | ((uint32_t)(d3) & 0xFFUL) << 18, \ + (((uint32_t)d3) >> 8) | ((uint32_t)(d4) & 0x3UL) << 24, \ + (((uint32_t)d4) >> 2) & 0x3FFFFFFUL, \ + (((uint32_t)d4) >> 28) | ((uint32_t)(d5) & 0x3FFFFFUL) << 4, \ + (((uint32_t)d5) >> 22) | ((uint32_t)(d6) & 0xFFFFUL) << 10, \ + (((uint32_t)d6) >> 16) | ((uint32_t)(d7) & 0x3FFUL) << 16, \ + (((uint32_t)d7) >> 10) \ +} + +#ifdef VERIFY +#define SECP256K1_FE_CONST(d7, d6, d5, d4, d3, d2, d1, d0) {SECP256K1_FE_CONST_INNER((d7), (d6), (d5), (d4), (d3), (d2), (d1), (d0)), 1, 1} +#else +#define SECP256K1_FE_CONST(d7, d6, d5, d4, d3, d2, d1, d0) {SECP256K1_FE_CONST_INNER((d7), (d6), (d5), (d4), (d3), (d2), (d1), (d0))} +#endif + +typedef struct { + uint32_t n[8]; +} secp256k1_fe_storage_t; + +#define SECP256K1_FE_STORAGE_CONST(d7, d6, d5, d4, d3, d2, d1, d0) {{ (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }} +#define SECP256K1_FE_STORAGE_CONST_GET(d) d.n[7], d.n[6], d.n[5], d.n[4],d.n[3], d.n[2], d.n[1], d.n[0] +#endif diff --git a/bf/secp256k1/src/field_10x26_impl.h b/bf/secp256k1/src/field_10x26_impl.h new file mode 100644 index 0000000..0fc0b86 --- /dev/null +++ b/bf/secp256k1/src/field_10x26_impl.h @@ -0,0 +1,1138 @@ +/********************************************************************** + * Copyright (c) 2013, 2014 Pieter Wuille * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#ifndef _SECP256K1_FIELD_REPR_IMPL_H_ +#define _SECP256K1_FIELD_REPR_IMPL_H_ + +#include +#include +#include "util.h" +#include "num.h" +#include "field.h" + +#ifdef VERIFY +static void secp256k1_fe_verify(const secp256k1_fe_t *a) { + const uint32_t *d = a->n; + int m = a->normalized ? 1 : 2 * a->magnitude, r = 1; + r &= (d[0] <= 0x3FFFFFFUL * m); + r &= (d[1] <= 0x3FFFFFFUL * m); + r &= (d[2] <= 0x3FFFFFFUL * m); + r &= (d[3] <= 0x3FFFFFFUL * m); + r &= (d[4] <= 0x3FFFFFFUL * m); + r &= (d[5] <= 0x3FFFFFFUL * m); + r &= (d[6] <= 0x3FFFFFFUL * m); + r &= (d[7] <= 0x3FFFFFFUL * m); + r &= (d[8] <= 0x3FFFFFFUL * m); + r &= (d[9] <= 0x03FFFFFUL * m); + r &= (a->magnitude >= 0); + r &= (a->magnitude <= 32); + if (a->normalized) { + r &= (a->magnitude <= 1); + if (r && (d[9] == 0x03FFFFFUL)) { + uint32_t mid = d[8] & d[7] & d[6] & d[5] & d[4] & d[3] & d[2]; + if (mid == 0x3FFFFFFUL) { + r &= ((d[1] + 0x40UL + ((d[0] + 0x3D1UL) >> 26)) <= 0x3FFFFFFUL); + } + } + } + VERIFY_CHECK(r == 1); +} +#else +static void secp256k1_fe_verify(const secp256k1_fe_t *a) { + (void)a; +} +#endif + +static void secp256k1_fe_normalize(secp256k1_fe_t *r) { + uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4], + t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9]; + + /* Reduce t9 at the start so there will be at most a single carry from the first pass */ + uint32_t m; + uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL; + + /* The first pass ensures the magnitude is 1, ... */ + t0 += x * 0x3D1UL; t1 += (x << 6); + t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; + t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; + t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; m = t2; + t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; m &= t3; + t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; m &= t4; + t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; m &= t5; + t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; m &= t6; + t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; m &= t7; + t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; m &= t8; + + /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */ + VERIFY_CHECK(t9 >> 23 == 0); + + /* At most a single final reduction is needed; check if the value is >= the field characteristic */ + x = (t9 >> 22) | ((t9 == 0x03FFFFFUL) & (m == 0x3FFFFFFUL) + & ((t1 + 0x40UL + ((t0 + 0x3D1UL) >> 26)) > 0x3FFFFFFUL)); + + /* Apply the final reduction (for constant-time behaviour, we do it always) */ + t0 += x * 0x3D1UL; t1 += (x << 6); + t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; + t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; + t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; + t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; + t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; + t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; + t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; + t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; + t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; + + /* If t9 didn't carry to bit 22 already, then it should have after any final reduction */ + VERIFY_CHECK(t9 >> 22 == x); + + /* Mask off the possible multiple of 2^256 from the final reduction */ + t9 &= 0x03FFFFFUL; + + r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4; + r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9; + +#ifdef VERIFY + r->magnitude = 1; + r->normalized = 1; + secp256k1_fe_verify(r); +#endif +} + +static void secp256k1_fe_normalize_weak(secp256k1_fe_t *r) { + uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4], + t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9]; + + /* Reduce t9 at the start so there will be at most a single carry from the first pass */ + uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL; + + /* The first pass ensures the magnitude is 1, ... */ + t0 += x * 0x3D1UL; t1 += (x << 6); + t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; + t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; + t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; + t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; + t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; + t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; + t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; + t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; + t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; + + /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */ + VERIFY_CHECK(t9 >> 23 == 0); + + r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4; + r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9; + +#ifdef VERIFY + r->magnitude = 1; + secp256k1_fe_verify(r); +#endif +} + +static void secp256k1_fe_normalize_var(secp256k1_fe_t *r) { + uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4], + t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9]; + + /* Reduce t9 at the start so there will be at most a single carry from the first pass */ + uint32_t m; + uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL; + + /* The first pass ensures the magnitude is 1, ... */ + t0 += x * 0x3D1UL; t1 += (x << 6); + t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; + t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; + t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; m = t2; + t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; m &= t3; + t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; m &= t4; + t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; m &= t5; + t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; m &= t6; + t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; m &= t7; + t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; m &= t8; + + /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */ + VERIFY_CHECK(t9 >> 23 == 0); + + /* At most a single final reduction is needed; check if the value is >= the field characteristic */ + x = (t9 >> 22) | ((t9 == 0x03FFFFFUL) & (m == 0x3FFFFFFUL) + & ((t1 + 0x40UL + ((t0 + 0x3D1UL) >> 26)) > 0x3FFFFFFUL)); + + if (x) { + t0 += 0x3D1UL; t1 += (x << 6); + t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; + t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; + t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; + t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; + t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; + t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; + t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; + t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; + t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; + + /* If t9 didn't carry to bit 22 already, then it should have after any final reduction */ + VERIFY_CHECK(t9 >> 22 == x); + + /* Mask off the possible multiple of 2^256 from the final reduction */ + t9 &= 0x03FFFFFUL; + } + + r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4; + r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9; + +#ifdef VERIFY + r->magnitude = 1; + r->normalized = 1; + secp256k1_fe_verify(r); +#endif +} + +static int secp256k1_fe_normalizes_to_zero(secp256k1_fe_t *r) { + uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4], + t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9]; + + /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */ + uint32_t z0, z1; + + /* Reduce t9 at the start so there will be at most a single carry from the first pass */ + uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL; + + /* The first pass ensures the magnitude is 1, ... */ + t0 += x * 0x3D1UL; t1 += (x << 6); + t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; z0 = t0; z1 = t0 ^ 0x3D0UL; + t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; z0 |= t1; z1 &= t1 ^ 0x40UL; + t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; z0 |= t2; z1 &= t2; + t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; z0 |= t3; z1 &= t3; + t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; z0 |= t4; z1 &= t4; + t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; z0 |= t5; z1 &= t5; + t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; z0 |= t6; z1 &= t6; + t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; z0 |= t7; z1 &= t7; + t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; z0 |= t8; z1 &= t8; + z0 |= t9; z1 &= t9 ^ 0x3C00000UL; + + /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */ + VERIFY_CHECK(t9 >> 23 == 0); + + return (z0 == 0) | (z1 == 0x3FFFFFFUL); +} + +static int secp256k1_fe_normalizes_to_zero_var(secp256k1_fe_t *r) { + uint32_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9; + uint32_t z0, z1; + uint32_t x; + + t0 = r->n[0]; + t9 = r->n[9]; + + /* Reduce t9 at the start so there will be at most a single carry from the first pass */ + x = t9 >> 22; + + /* The first pass ensures the magnitude is 1, ... */ + t0 += x * 0x3D1UL; + + /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */ + z0 = t0 & 0x3FFFFFFUL; + z1 = z0 ^ 0x3D0UL; + + /* Fast return path should catch the majority of cases */ + if ((z0 != 0UL) & (z1 != 0x3FFFFFFUL)) { + return 0; + } + + t1 = r->n[1]; + t2 = r->n[2]; + t3 = r->n[3]; + t4 = r->n[4]; + t5 = r->n[5]; + t6 = r->n[6]; + t7 = r->n[7]; + t8 = r->n[8]; + + t9 &= 0x03FFFFFUL; + t1 += (x << 6); + + t1 += (t0 >> 26); t0 = z0; + t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; z0 |= t1; z1 &= t1 ^ 0x40UL; + t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; z0 |= t2; z1 &= t2; + t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; z0 |= t3; z1 &= t3; + t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; z0 |= t4; z1 &= t4; + t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; z0 |= t5; z1 &= t5; + t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; z0 |= t6; z1 &= t6; + t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; z0 |= t7; z1 &= t7; + t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; z0 |= t8; z1 &= t8; + z0 |= t9; z1 &= t9 ^ 0x3C00000UL; + + /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */ + VERIFY_CHECK(t9 >> 23 == 0); + + return (z0 == 0) | (z1 == 0x3FFFFFFUL); +} + +SECP256K1_INLINE static void secp256k1_fe_set_int(secp256k1_fe_t *r, int a) { + r->n[0] = a; + r->n[1] = r->n[2] = r->n[3] = r->n[4] = r->n[5] = r->n[6] = r->n[7] = r->n[8] = r->n[9] = 0; +#ifdef VERIFY + r->magnitude = 1; + r->normalized = 1; + secp256k1_fe_verify(r); +#endif +} + +SECP256K1_INLINE static int secp256k1_fe_is_zero(const secp256k1_fe_t *a) { + const uint32_t *t = a->n; +#ifdef VERIFY + VERIFY_CHECK(a->normalized); + secp256k1_fe_verify(a); +#endif + return (t[0] | t[1] | t[2] | t[3] | t[4] | t[5] | t[6] | t[7] | t[8] | t[9]) == 0; +} + +SECP256K1_INLINE static int secp256k1_fe_is_odd(const secp256k1_fe_t *a) { +#ifdef VERIFY + VERIFY_CHECK(a->normalized); + secp256k1_fe_verify(a); +#endif + return a->n[0] & 1; +} + +SECP256K1_INLINE static void secp256k1_fe_clear(secp256k1_fe_t *a) { + int i; +#ifdef VERIFY + a->magnitude = 0; + a->normalized = 1; +#endif + for (i=0; i<10; i++) { + a->n[i] = 0; + } +} + +static int secp256k1_fe_cmp_var(const secp256k1_fe_t *a, const secp256k1_fe_t *b) { + int i; +#ifdef VERIFY + VERIFY_CHECK(a->normalized); + VERIFY_CHECK(b->normalized); + secp256k1_fe_verify(a); + secp256k1_fe_verify(b); +#endif + for (i = 9; i >= 0; i--) { + if (a->n[i] > b->n[i]) { + return 1; + } + if (a->n[i] < b->n[i]) { + return -1; + } + } + return 0; +} + +static int secp256k1_fe_set_b32(secp256k1_fe_t *r, const unsigned char *a) { + int i; + r->n[0] = r->n[1] = r->n[2] = r->n[3] = r->n[4] = 0; + r->n[5] = r->n[6] = r->n[7] = r->n[8] = r->n[9] = 0; + for (i=0; i<32; i++) { + int j; + for (j=0; j<4; j++) { + int limb = (8*i+2*j)/26; + int shift = (8*i+2*j)%26; + r->n[limb] |= (uint32_t)((a[31-i] >> (2*j)) & 0x3) << shift; + } + } + if (r->n[9] == 0x3FFFFFUL && (r->n[8] & r->n[7] & r->n[6] & r->n[5] & r->n[4] & r->n[3] & r->n[2]) == 0x3FFFFFFUL && (r->n[1] + 0x40UL + ((r->n[0] + 0x3D1UL) >> 26)) > 0x3FFFFFFUL) { + return 0; + } +#ifdef VERIFY + r->magnitude = 1; + r->normalized = 1; + secp256k1_fe_verify(r); +#endif + return 1; +} + +/** Convert a field element to a 32-byte big endian value. Requires the input to be normalized */ +static void secp256k1_fe_get_b32(unsigned char *r, const secp256k1_fe_t *a) { + int i; +#ifdef VERIFY + VERIFY_CHECK(a->normalized); + secp256k1_fe_verify(a); +#endif + for (i=0; i<32; i++) { + int j; + int c = 0; + for (j=0; j<4; j++) { + int limb = (8*i+2*j)/26; + int shift = (8*i+2*j)%26; + c |= ((a->n[limb] >> shift) & 0x3) << (2 * j); + } + r[31-i] = c; + } +} + +SECP256K1_INLINE static void secp256k1_fe_negate(secp256k1_fe_t *r, const secp256k1_fe_t *a, int m) { +#ifdef VERIFY + VERIFY_CHECK(a->magnitude <= m); + secp256k1_fe_verify(a); +#endif + r->n[0] = 0x3FFFC2FUL * 2 * (m + 1) - a->n[0]; + r->n[1] = 0x3FFFFBFUL * 2 * (m + 1) - a->n[1]; + r->n[2] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[2]; + r->n[3] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[3]; + r->n[4] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[4]; + r->n[5] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[5]; + r->n[6] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[6]; + r->n[7] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[7]; + r->n[8] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[8]; + r->n[9] = 0x03FFFFFUL * 2 * (m + 1) - a->n[9]; +#ifdef VERIFY + r->magnitude = m + 1; + r->normalized = 0; + secp256k1_fe_verify(r); +#endif +} + +SECP256K1_INLINE static void secp256k1_fe_mul_int(secp256k1_fe_t *r, int a) { + r->n[0] *= a; + r->n[1] *= a; + r->n[2] *= a; + r->n[3] *= a; + r->n[4] *= a; + r->n[5] *= a; + r->n[6] *= a; + r->n[7] *= a; + r->n[8] *= a; + r->n[9] *= a; +#ifdef VERIFY + r->magnitude *= a; + r->normalized = 0; + secp256k1_fe_verify(r); +#endif +} + +SECP256K1_INLINE static void secp256k1_fe_add(secp256k1_fe_t *r, const secp256k1_fe_t *a) { +#ifdef VERIFY + secp256k1_fe_verify(a); +#endif + r->n[0] += a->n[0]; + r->n[1] += a->n[1]; + r->n[2] += a->n[2]; + r->n[3] += a->n[3]; + r->n[4] += a->n[4]; + r->n[5] += a->n[5]; + r->n[6] += a->n[6]; + r->n[7] += a->n[7]; + r->n[8] += a->n[8]; + r->n[9] += a->n[9]; +#ifdef VERIFY + r->magnitude += a->magnitude; + r->normalized = 0; + secp256k1_fe_verify(r); +#endif +} + +#ifdef VERIFY +#define VERIFY_BITS(x, n) VERIFY_CHECK(((x) >> (n)) == 0) +#else +#define VERIFY_BITS(x, n) do { } while(0) +#endif + +SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t *a, const uint32_t * SECP256K1_RESTRICT b) { + uint64_t c, d; + uint64_t u0, u1, u2, u3, u4, u5, u6, u7, u8; + uint32_t t9, t1, t0, t2, t3, t4, t5, t6, t7; + const uint32_t M = 0x3FFFFFFUL, R0 = 0x3D10UL, R1 = 0x400UL; + + VERIFY_BITS(a[0], 30); + VERIFY_BITS(a[1], 30); + VERIFY_BITS(a[2], 30); + VERIFY_BITS(a[3], 30); + VERIFY_BITS(a[4], 30); + VERIFY_BITS(a[5], 30); + VERIFY_BITS(a[6], 30); + VERIFY_BITS(a[7], 30); + VERIFY_BITS(a[8], 30); + VERIFY_BITS(a[9], 26); + VERIFY_BITS(b[0], 30); + VERIFY_BITS(b[1], 30); + VERIFY_BITS(b[2], 30); + VERIFY_BITS(b[3], 30); + VERIFY_BITS(b[4], 30); + VERIFY_BITS(b[5], 30); + VERIFY_BITS(b[6], 30); + VERIFY_BITS(b[7], 30); + VERIFY_BITS(b[8], 30); + VERIFY_BITS(b[9], 26); + + /** [... a b c] is a shorthand for ... + a<<52 + b<<26 + c<<0 mod n. + * px is a shorthand for sum(a[i]*b[x-i], i=0..x). + * Note that [x 0 0 0 0 0 0 0 0 0 0] = [x*R1 x*R0]. + */ + + d = (uint64_t)a[0] * b[9] + + (uint64_t)a[1] * b[8] + + (uint64_t)a[2] * b[7] + + (uint64_t)a[3] * b[6] + + (uint64_t)a[4] * b[5] + + (uint64_t)a[5] * b[4] + + (uint64_t)a[6] * b[3] + + (uint64_t)a[7] * b[2] + + (uint64_t)a[8] * b[1] + + (uint64_t)a[9] * b[0]; + /* VERIFY_BITS(d, 64); */ + /* [d 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */ + t9 = d & M; d >>= 26; + VERIFY_BITS(t9, 26); + VERIFY_BITS(d, 38); + /* [d t9 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */ + + c = (uint64_t)a[0] * b[0]; + VERIFY_BITS(c, 60); + /* [d t9 0 0 0 0 0 0 0 0 c] = [p9 0 0 0 0 0 0 0 0 p0] */ + d += (uint64_t)a[1] * b[9] + + (uint64_t)a[2] * b[8] + + (uint64_t)a[3] * b[7] + + (uint64_t)a[4] * b[6] + + (uint64_t)a[5] * b[5] + + (uint64_t)a[6] * b[4] + + (uint64_t)a[7] * b[3] + + (uint64_t)a[8] * b[2] + + (uint64_t)a[9] * b[1]; + VERIFY_BITS(d, 63); + /* [d t9 0 0 0 0 0 0 0 0 c] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ + u0 = d & M; d >>= 26; c += u0 * R0; + VERIFY_BITS(u0, 26); + VERIFY_BITS(d, 37); + VERIFY_BITS(c, 61); + /* [d u0 t9 0 0 0 0 0 0 0 0 c-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ + t0 = c & M; c >>= 26; c += u0 * R1; + VERIFY_BITS(t0, 26); + VERIFY_BITS(c, 37); + /* [d u0 t9 0 0 0 0 0 0 0 c-u0*R1 t0-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ + /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ + + c += (uint64_t)a[0] * b[1] + + (uint64_t)a[1] * b[0]; + VERIFY_BITS(c, 62); + /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 p1 p0] */ + d += (uint64_t)a[2] * b[9] + + (uint64_t)a[3] * b[8] + + (uint64_t)a[4] * b[7] + + (uint64_t)a[5] * b[6] + + (uint64_t)a[6] * b[5] + + (uint64_t)a[7] * b[4] + + (uint64_t)a[8] * b[3] + + (uint64_t)a[9] * b[2]; + VERIFY_BITS(d, 63); + /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ + u1 = d & M; d >>= 26; c += u1 * R0; + VERIFY_BITS(u1, 26); + VERIFY_BITS(d, 37); + VERIFY_BITS(c, 63); + /* [d u1 0 t9 0 0 0 0 0 0 0 c-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ + t1 = c & M; c >>= 26; c += u1 * R1; + VERIFY_BITS(t1, 26); + VERIFY_BITS(c, 38); + /* [d u1 0 t9 0 0 0 0 0 0 c-u1*R1 t1-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ + /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ + + c += (uint64_t)a[0] * b[2] + + (uint64_t)a[1] * b[1] + + (uint64_t)a[2] * b[0]; + VERIFY_BITS(c, 62); + /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ + d += (uint64_t)a[3] * b[9] + + (uint64_t)a[4] * b[8] + + (uint64_t)a[5] * b[7] + + (uint64_t)a[6] * b[6] + + (uint64_t)a[7] * b[5] + + (uint64_t)a[8] * b[4] + + (uint64_t)a[9] * b[3]; + VERIFY_BITS(d, 63); + /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ + u2 = d & M; d >>= 26; c += u2 * R0; + VERIFY_BITS(u2, 26); + VERIFY_BITS(d, 37); + VERIFY_BITS(c, 63); + /* [d u2 0 0 t9 0 0 0 0 0 0 c-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ + t2 = c & M; c >>= 26; c += u2 * R1; + VERIFY_BITS(t2, 26); + VERIFY_BITS(c, 38); + /* [d u2 0 0 t9 0 0 0 0 0 c-u2*R1 t2-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ + /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ + + c += (uint64_t)a[0] * b[3] + + (uint64_t)a[1] * b[2] + + (uint64_t)a[2] * b[1] + + (uint64_t)a[3] * b[0]; + VERIFY_BITS(c, 63); + /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ + d += (uint64_t)a[4] * b[9] + + (uint64_t)a[5] * b[8] + + (uint64_t)a[6] * b[7] + + (uint64_t)a[7] * b[6] + + (uint64_t)a[8] * b[5] + + (uint64_t)a[9] * b[4]; + VERIFY_BITS(d, 63); + /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ + u3 = d & M; d >>= 26; c += u3 * R0; + VERIFY_BITS(u3, 26); + VERIFY_BITS(d, 37); + /* VERIFY_BITS(c, 64); */ + /* [d u3 0 0 0 t9 0 0 0 0 0 c-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ + t3 = c & M; c >>= 26; c += u3 * R1; + VERIFY_BITS(t3, 26); + VERIFY_BITS(c, 39); + /* [d u3 0 0 0 t9 0 0 0 0 c-u3*R1 t3-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ + /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ + + c += (uint64_t)a[0] * b[4] + + (uint64_t)a[1] * b[3] + + (uint64_t)a[2] * b[2] + + (uint64_t)a[3] * b[1] + + (uint64_t)a[4] * b[0]; + VERIFY_BITS(c, 63); + /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ + d += (uint64_t)a[5] * b[9] + + (uint64_t)a[6] * b[8] + + (uint64_t)a[7] * b[7] + + (uint64_t)a[8] * b[6] + + (uint64_t)a[9] * b[5]; + VERIFY_BITS(d, 62); + /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ + u4 = d & M; d >>= 26; c += u4 * R0; + VERIFY_BITS(u4, 26); + VERIFY_BITS(d, 36); + /* VERIFY_BITS(c, 64); */ + /* [d u4 0 0 0 0 t9 0 0 0 0 c-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ + t4 = c & M; c >>= 26; c += u4 * R1; + VERIFY_BITS(t4, 26); + VERIFY_BITS(c, 39); + /* [d u4 0 0 0 0 t9 0 0 0 c-u4*R1 t4-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ + /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ + + c += (uint64_t)a[0] * b[5] + + (uint64_t)a[1] * b[4] + + (uint64_t)a[2] * b[3] + + (uint64_t)a[3] * b[2] + + (uint64_t)a[4] * b[1] + + (uint64_t)a[5] * b[0]; + VERIFY_BITS(c, 63); + /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ + d += (uint64_t)a[6] * b[9] + + (uint64_t)a[7] * b[8] + + (uint64_t)a[8] * b[7] + + (uint64_t)a[9] * b[6]; + VERIFY_BITS(d, 62); + /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ + u5 = d & M; d >>= 26; c += u5 * R0; + VERIFY_BITS(u5, 26); + VERIFY_BITS(d, 36); + /* VERIFY_BITS(c, 64); */ + /* [d u5 0 0 0 0 0 t9 0 0 0 c-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ + t5 = c & M; c >>= 26; c += u5 * R1; + VERIFY_BITS(t5, 26); + VERIFY_BITS(c, 39); + /* [d u5 0 0 0 0 0 t9 0 0 c-u5*R1 t5-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ + /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ + + c += (uint64_t)a[0] * b[6] + + (uint64_t)a[1] * b[5] + + (uint64_t)a[2] * b[4] + + (uint64_t)a[3] * b[3] + + (uint64_t)a[4] * b[2] + + (uint64_t)a[5] * b[1] + + (uint64_t)a[6] * b[0]; + VERIFY_BITS(c, 63); + /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ + d += (uint64_t)a[7] * b[9] + + (uint64_t)a[8] * b[8] + + (uint64_t)a[9] * b[7]; + VERIFY_BITS(d, 61); + /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ + u6 = d & M; d >>= 26; c += u6 * R0; + VERIFY_BITS(u6, 26); + VERIFY_BITS(d, 35); + /* VERIFY_BITS(c, 64); */ + /* [d u6 0 0 0 0 0 0 t9 0 0 c-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ + t6 = c & M; c >>= 26; c += u6 * R1; + VERIFY_BITS(t6, 26); + VERIFY_BITS(c, 39); + /* [d u6 0 0 0 0 0 0 t9 0 c-u6*R1 t6-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ + /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ + + c += (uint64_t)a[0] * b[7] + + (uint64_t)a[1] * b[6] + + (uint64_t)a[2] * b[5] + + (uint64_t)a[3] * b[4] + + (uint64_t)a[4] * b[3] + + (uint64_t)a[5] * b[2] + + (uint64_t)a[6] * b[1] + + (uint64_t)a[7] * b[0]; + /* VERIFY_BITS(c, 64); */ + VERIFY_CHECK(c <= 0x8000007C00000007ULL); + /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ + d += (uint64_t)a[8] * b[9] + + (uint64_t)a[9] * b[8]; + VERIFY_BITS(d, 58); + /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ + u7 = d & M; d >>= 26; c += u7 * R0; + VERIFY_BITS(u7, 26); + VERIFY_BITS(d, 32); + /* VERIFY_BITS(c, 64); */ + VERIFY_CHECK(c <= 0x800001703FFFC2F7ULL); + /* [d u7 0 0 0 0 0 0 0 t9 0 c-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ + t7 = c & M; c >>= 26; c += u7 * R1; + VERIFY_BITS(t7, 26); + VERIFY_BITS(c, 38); + /* [d u7 0 0 0 0 0 0 0 t9 c-u7*R1 t7-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ + /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ + + c += (uint64_t)a[0] * b[8] + + (uint64_t)a[1] * b[7] + + (uint64_t)a[2] * b[6] + + (uint64_t)a[3] * b[5] + + (uint64_t)a[4] * b[4] + + (uint64_t)a[5] * b[3] + + (uint64_t)a[6] * b[2] + + (uint64_t)a[7] * b[1] + + (uint64_t)a[8] * b[0]; + /* VERIFY_BITS(c, 64); */ + VERIFY_CHECK(c <= 0x9000007B80000008ULL); + /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + d += (uint64_t)a[9] * b[9]; + VERIFY_BITS(d, 57); + /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + u8 = d & M; d >>= 26; c += u8 * R0; + VERIFY_BITS(u8, 26); + VERIFY_BITS(d, 31); + /* VERIFY_BITS(c, 64); */ + VERIFY_CHECK(c <= 0x9000016FBFFFC2F8ULL); + /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + + r[3] = t3; + VERIFY_BITS(r[3], 26); + /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + r[4] = t4; + VERIFY_BITS(r[4], 26); + /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + r[5] = t5; + VERIFY_BITS(r[5], 26); + /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + r[6] = t6; + VERIFY_BITS(r[6], 26); + /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + r[7] = t7; + VERIFY_BITS(r[7], 26); + /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + + r[8] = c & M; c >>= 26; c += u8 * R1; + VERIFY_BITS(r[8], 26); + VERIFY_BITS(c, 39); + /* [d u8 0 0 0 0 0 0 0 0 t9+c-u8*R1 r8-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + /* [d 0 0 0 0 0 0 0 0 0 t9+c r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + c += d * R0 + t9; + VERIFY_BITS(c, 45); + /* [d 0 0 0 0 0 0 0 0 0 c-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + r[9] = c & (M >> 4); c >>= 22; c += d * (R1 << 4); + VERIFY_BITS(r[9], 22); + VERIFY_BITS(c, 46); + /* [d 0 0 0 0 0 0 0 0 r9+((c-d*R1<<4)<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + /* [d 0 0 0 0 0 0 0 -d*R1 r9+(c<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + + d = c * (R0 >> 4) + t0; + VERIFY_BITS(d, 56); + /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 d-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + r[0] = d & M; d >>= 26; + VERIFY_BITS(r[0], 26); + VERIFY_BITS(d, 30); + /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1+d r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + d += c * (R1 >> 4) + t1; + VERIFY_BITS(d, 53); + VERIFY_CHECK(d <= 0x10000003FFFFBFULL); + /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 d-c*R1>>4 r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + /* [r9 r8 r7 r6 r5 r4 r3 t2 d r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + r[1] = d & M; d >>= 26; + VERIFY_BITS(r[1], 26); + VERIFY_BITS(d, 27); + VERIFY_CHECK(d <= 0x4000000ULL); + /* [r9 r8 r7 r6 r5 r4 r3 t2+d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + d += t2; + VERIFY_BITS(d, 27); + /* [r9 r8 r7 r6 r5 r4 r3 d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + r[2] = d; + VERIFY_BITS(r[2], 27); + /* [r9 r8 r7 r6 r5 r4 r3 r2 r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ +} + +SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t *a) { + uint64_t c, d; + uint64_t u0, u1, u2, u3, u4, u5, u6, u7, u8; + uint32_t t9, t0, t1, t2, t3, t4, t5, t6, t7; + const uint32_t M = 0x3FFFFFFUL, R0 = 0x3D10UL, R1 = 0x400UL; + + VERIFY_BITS(a[0], 30); + VERIFY_BITS(a[1], 30); + VERIFY_BITS(a[2], 30); + VERIFY_BITS(a[3], 30); + VERIFY_BITS(a[4], 30); + VERIFY_BITS(a[5], 30); + VERIFY_BITS(a[6], 30); + VERIFY_BITS(a[7], 30); + VERIFY_BITS(a[8], 30); + VERIFY_BITS(a[9], 26); + + /** [... a b c] is a shorthand for ... + a<<52 + b<<26 + c<<0 mod n. + * px is a shorthand for sum(a[i]*a[x-i], i=0..x). + * Note that [x 0 0 0 0 0 0 0 0 0 0] = [x*R1 x*R0]. + */ + + d = (uint64_t)(a[0]*2) * a[9] + + (uint64_t)(a[1]*2) * a[8] + + (uint64_t)(a[2]*2) * a[7] + + (uint64_t)(a[3]*2) * a[6] + + (uint64_t)(a[4]*2) * a[5]; + /* VERIFY_BITS(d, 64); */ + /* [d 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */ + t9 = d & M; d >>= 26; + VERIFY_BITS(t9, 26); + VERIFY_BITS(d, 38); + /* [d t9 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */ + + c = (uint64_t)a[0] * a[0]; + VERIFY_BITS(c, 60); + /* [d t9 0 0 0 0 0 0 0 0 c] = [p9 0 0 0 0 0 0 0 0 p0] */ + d += (uint64_t)(a[1]*2) * a[9] + + (uint64_t)(a[2]*2) * a[8] + + (uint64_t)(a[3]*2) * a[7] + + (uint64_t)(a[4]*2) * a[6] + + (uint64_t)a[5] * a[5]; + VERIFY_BITS(d, 63); + /* [d t9 0 0 0 0 0 0 0 0 c] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ + u0 = d & M; d >>= 26; c += u0 * R0; + VERIFY_BITS(u0, 26); + VERIFY_BITS(d, 37); + VERIFY_BITS(c, 61); + /* [d u0 t9 0 0 0 0 0 0 0 0 c-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ + t0 = c & M; c >>= 26; c += u0 * R1; + VERIFY_BITS(t0, 26); + VERIFY_BITS(c, 37); + /* [d u0 t9 0 0 0 0 0 0 0 c-u0*R1 t0-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ + /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ + + c += (uint64_t)(a[0]*2) * a[1]; + VERIFY_BITS(c, 62); + /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 p1 p0] */ + d += (uint64_t)(a[2]*2) * a[9] + + (uint64_t)(a[3]*2) * a[8] + + (uint64_t)(a[4]*2) * a[7] + + (uint64_t)(a[5]*2) * a[6]; + VERIFY_BITS(d, 63); + /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ + u1 = d & M; d >>= 26; c += u1 * R0; + VERIFY_BITS(u1, 26); + VERIFY_BITS(d, 37); + VERIFY_BITS(c, 63); + /* [d u1 0 t9 0 0 0 0 0 0 0 c-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ + t1 = c & M; c >>= 26; c += u1 * R1; + VERIFY_BITS(t1, 26); + VERIFY_BITS(c, 38); + /* [d u1 0 t9 0 0 0 0 0 0 c-u1*R1 t1-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ + /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ + + c += (uint64_t)(a[0]*2) * a[2] + + (uint64_t)a[1] * a[1]; + VERIFY_BITS(c, 62); + /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ + d += (uint64_t)(a[3]*2) * a[9] + + (uint64_t)(a[4]*2) * a[8] + + (uint64_t)(a[5]*2) * a[7] + + (uint64_t)a[6] * a[6]; + VERIFY_BITS(d, 63); + /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ + u2 = d & M; d >>= 26; c += u2 * R0; + VERIFY_BITS(u2, 26); + VERIFY_BITS(d, 37); + VERIFY_BITS(c, 63); + /* [d u2 0 0 t9 0 0 0 0 0 0 c-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ + t2 = c & M; c >>= 26; c += u2 * R1; + VERIFY_BITS(t2, 26); + VERIFY_BITS(c, 38); + /* [d u2 0 0 t9 0 0 0 0 0 c-u2*R1 t2-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ + /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ + + c += (uint64_t)(a[0]*2) * a[3] + + (uint64_t)(a[1]*2) * a[2]; + VERIFY_BITS(c, 63); + /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ + d += (uint64_t)(a[4]*2) * a[9] + + (uint64_t)(a[5]*2) * a[8] + + (uint64_t)(a[6]*2) * a[7]; + VERIFY_BITS(d, 63); + /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ + u3 = d & M; d >>= 26; c += u3 * R0; + VERIFY_BITS(u3, 26); + VERIFY_BITS(d, 37); + /* VERIFY_BITS(c, 64); */ + /* [d u3 0 0 0 t9 0 0 0 0 0 c-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ + t3 = c & M; c >>= 26; c += u3 * R1; + VERIFY_BITS(t3, 26); + VERIFY_BITS(c, 39); + /* [d u3 0 0 0 t9 0 0 0 0 c-u3*R1 t3-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ + /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ + + c += (uint64_t)(a[0]*2) * a[4] + + (uint64_t)(a[1]*2) * a[3] + + (uint64_t)a[2] * a[2]; + VERIFY_BITS(c, 63); + /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ + d += (uint64_t)(a[5]*2) * a[9] + + (uint64_t)(a[6]*2) * a[8] + + (uint64_t)a[7] * a[7]; + VERIFY_BITS(d, 62); + /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ + u4 = d & M; d >>= 26; c += u4 * R0; + VERIFY_BITS(u4, 26); + VERIFY_BITS(d, 36); + /* VERIFY_BITS(c, 64); */ + /* [d u4 0 0 0 0 t9 0 0 0 0 c-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ + t4 = c & M; c >>= 26; c += u4 * R1; + VERIFY_BITS(t4, 26); + VERIFY_BITS(c, 39); + /* [d u4 0 0 0 0 t9 0 0 0 c-u4*R1 t4-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ + /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ + + c += (uint64_t)(a[0]*2) * a[5] + + (uint64_t)(a[1]*2) * a[4] + + (uint64_t)(a[2]*2) * a[3]; + VERIFY_BITS(c, 63); + /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ + d += (uint64_t)(a[6]*2) * a[9] + + (uint64_t)(a[7]*2) * a[8]; + VERIFY_BITS(d, 62); + /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ + u5 = d & M; d >>= 26; c += u5 * R0; + VERIFY_BITS(u5, 26); + VERIFY_BITS(d, 36); + /* VERIFY_BITS(c, 64); */ + /* [d u5 0 0 0 0 0 t9 0 0 0 c-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ + t5 = c & M; c >>= 26; c += u5 * R1; + VERIFY_BITS(t5, 26); + VERIFY_BITS(c, 39); + /* [d u5 0 0 0 0 0 t9 0 0 c-u5*R1 t5-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ + /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ + + c += (uint64_t)(a[0]*2) * a[6] + + (uint64_t)(a[1]*2) * a[5] + + (uint64_t)(a[2]*2) * a[4] + + (uint64_t)a[3] * a[3]; + VERIFY_BITS(c, 63); + /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ + d += (uint64_t)(a[7]*2) * a[9] + + (uint64_t)a[8] * a[8]; + VERIFY_BITS(d, 61); + /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ + u6 = d & M; d >>= 26; c += u6 * R0; + VERIFY_BITS(u6, 26); + VERIFY_BITS(d, 35); + /* VERIFY_BITS(c, 64); */ + /* [d u6 0 0 0 0 0 0 t9 0 0 c-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ + t6 = c & M; c >>= 26; c += u6 * R1; + VERIFY_BITS(t6, 26); + VERIFY_BITS(c, 39); + /* [d u6 0 0 0 0 0 0 t9 0 c-u6*R1 t6-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ + /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ + + c += (uint64_t)(a[0]*2) * a[7] + + (uint64_t)(a[1]*2) * a[6] + + (uint64_t)(a[2]*2) * a[5] + + (uint64_t)(a[3]*2) * a[4]; + /* VERIFY_BITS(c, 64); */ + VERIFY_CHECK(c <= 0x8000007C00000007ULL); + /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ + d += (uint64_t)(a[8]*2) * a[9]; + VERIFY_BITS(d, 58); + /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ + u7 = d & M; d >>= 26; c += u7 * R0; + VERIFY_BITS(u7, 26); + VERIFY_BITS(d, 32); + /* VERIFY_BITS(c, 64); */ + VERIFY_CHECK(c <= 0x800001703FFFC2F7ULL); + /* [d u7 0 0 0 0 0 0 0 t9 0 c-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ + t7 = c & M; c >>= 26; c += u7 * R1; + VERIFY_BITS(t7, 26); + VERIFY_BITS(c, 38); + /* [d u7 0 0 0 0 0 0 0 t9 c-u7*R1 t7-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ + /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ + + c += (uint64_t)(a[0]*2) * a[8] + + (uint64_t)(a[1]*2) * a[7] + + (uint64_t)(a[2]*2) * a[6] + + (uint64_t)(a[3]*2) * a[5] + + (uint64_t)a[4] * a[4]; + /* VERIFY_BITS(c, 64); */ + VERIFY_CHECK(c <= 0x9000007B80000008ULL); + /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + d += (uint64_t)a[9] * a[9]; + VERIFY_BITS(d, 57); + /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + u8 = d & M; d >>= 26; c += u8 * R0; + VERIFY_BITS(u8, 26); + VERIFY_BITS(d, 31); + /* VERIFY_BITS(c, 64); */ + VERIFY_CHECK(c <= 0x9000016FBFFFC2F8ULL); + /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + + r[3] = t3; + VERIFY_BITS(r[3], 26); + /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + r[4] = t4; + VERIFY_BITS(r[4], 26); + /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + r[5] = t5; + VERIFY_BITS(r[5], 26); + /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + r[6] = t6; + VERIFY_BITS(r[6], 26); + /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + r[7] = t7; + VERIFY_BITS(r[7], 26); + /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + + r[8] = c & M; c >>= 26; c += u8 * R1; + VERIFY_BITS(r[8], 26); + VERIFY_BITS(c, 39); + /* [d u8 0 0 0 0 0 0 0 0 t9+c-u8*R1 r8-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + /* [d 0 0 0 0 0 0 0 0 0 t9+c r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + c += d * R0 + t9; + VERIFY_BITS(c, 45); + /* [d 0 0 0 0 0 0 0 0 0 c-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + r[9] = c & (M >> 4); c >>= 22; c += d * (R1 << 4); + VERIFY_BITS(r[9], 22); + VERIFY_BITS(c, 46); + /* [d 0 0 0 0 0 0 0 0 r9+((c-d*R1<<4)<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + /* [d 0 0 0 0 0 0 0 -d*R1 r9+(c<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + + d = c * (R0 >> 4) + t0; + VERIFY_BITS(d, 56); + /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 d-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + r[0] = d & M; d >>= 26; + VERIFY_BITS(r[0], 26); + VERIFY_BITS(d, 30); + /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1+d r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + d += c * (R1 >> 4) + t1; + VERIFY_BITS(d, 53); + VERIFY_CHECK(d <= 0x10000003FFFFBFULL); + /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 d-c*R1>>4 r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + /* [r9 r8 r7 r6 r5 r4 r3 t2 d r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + r[1] = d & M; d >>= 26; + VERIFY_BITS(r[1], 26); + VERIFY_BITS(d, 27); + VERIFY_CHECK(d <= 0x4000000ULL); + /* [r9 r8 r7 r6 r5 r4 r3 t2+d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + d += t2; + VERIFY_BITS(d, 27); + /* [r9 r8 r7 r6 r5 r4 r3 d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + r[2] = d; + VERIFY_BITS(r[2], 27); + /* [r9 r8 r7 r6 r5 r4 r3 r2 r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ +} + + +static void secp256k1_fe_mul(secp256k1_fe_t *r, const secp256k1_fe_t *a, const secp256k1_fe_t * SECP256K1_RESTRICT b) { +#ifdef VERIFY + VERIFY_CHECK(a->magnitude <= 8); + VERIFY_CHECK(b->magnitude <= 8); + secp256k1_fe_verify(a); + secp256k1_fe_verify(b); + VERIFY_CHECK(r != b); +#endif + secp256k1_fe_mul_inner(r->n, a->n, b->n); +#ifdef VERIFY + r->magnitude = 1; + r->normalized = 0; + secp256k1_fe_verify(r); +#endif +} + +static void secp256k1_fe_sqr(secp256k1_fe_t *r, const secp256k1_fe_t *a) { +#ifdef VERIFY + VERIFY_CHECK(a->magnitude <= 8); + secp256k1_fe_verify(a); +#endif + secp256k1_fe_sqr_inner(r->n, a->n); +#ifdef VERIFY + r->magnitude = 1; + r->normalized = 0; + secp256k1_fe_verify(r); +#endif +} + +static SECP256K1_INLINE void secp256k1_fe_cmov(secp256k1_fe_t *r, const secp256k1_fe_t *a, int flag) { + uint32_t mask0, mask1; + mask0 = flag + ~((uint32_t)0); + mask1 = ~mask0; + r->n[0] = (r->n[0] & mask0) | (a->n[0] & mask1); + r->n[1] = (r->n[1] & mask0) | (a->n[1] & mask1); + r->n[2] = (r->n[2] & mask0) | (a->n[2] & mask1); + r->n[3] = (r->n[3] & mask0) | (a->n[3] & mask1); + r->n[4] = (r->n[4] & mask0) | (a->n[4] & mask1); + r->n[5] = (r->n[5] & mask0) | (a->n[5] & mask1); + r->n[6] = (r->n[6] & mask0) | (a->n[6] & mask1); + r->n[7] = (r->n[7] & mask0) | (a->n[7] & mask1); + r->n[8] = (r->n[8] & mask0) | (a->n[8] & mask1); + r->n[9] = (r->n[9] & mask0) | (a->n[9] & mask1); +#ifdef VERIFY + if (a->magnitude > r->magnitude) { + r->magnitude = a->magnitude; + } + r->normalized &= a->normalized; +#endif +} + +static SECP256K1_INLINE void secp256k1_fe_storage_cmov(secp256k1_fe_storage_t *r, const secp256k1_fe_storage_t *a, int flag) { + uint32_t mask0, mask1; + mask0 = flag + ~((uint32_t)0); + mask1 = ~mask0; + r->n[0] = (r->n[0] & mask0) | (a->n[0] & mask1); + r->n[1] = (r->n[1] & mask0) | (a->n[1] & mask1); + r->n[2] = (r->n[2] & mask0) | (a->n[2] & mask1); + r->n[3] = (r->n[3] & mask0) | (a->n[3] & mask1); + r->n[4] = (r->n[4] & mask0) | (a->n[4] & mask1); + r->n[5] = (r->n[5] & mask0) | (a->n[5] & mask1); + r->n[6] = (r->n[6] & mask0) | (a->n[6] & mask1); + r->n[7] = (r->n[7] & mask0) | (a->n[7] & mask1); +} + +static void secp256k1_fe_to_storage(secp256k1_fe_storage_t *r, const secp256k1_fe_t *a) { +#ifdef VERIFY + VERIFY_CHECK(a->normalized); +#endif + r->n[0] = a->n[0] | a->n[1] << 26; + r->n[1] = a->n[1] >> 6 | a->n[2] << 20; + r->n[2] = a->n[2] >> 12 | a->n[3] << 14; + r->n[3] = a->n[3] >> 18 | a->n[4] << 8; + r->n[4] = a->n[4] >> 24 | a->n[5] << 2 | a->n[6] << 28; + r->n[5] = a->n[6] >> 4 | a->n[7] << 22; + r->n[6] = a->n[7] >> 10 | a->n[8] << 16; + r->n[7] = a->n[8] >> 16 | a->n[9] << 10; +} + +static SECP256K1_INLINE void secp256k1_fe_from_storage(secp256k1_fe_t *r, const secp256k1_fe_storage_t *a) { + r->n[0] = a->n[0] & 0x3FFFFFFUL; + r->n[1] = a->n[0] >> 26 | ((a->n[1] << 6) & 0x3FFFFFFUL); + r->n[2] = a->n[1] >> 20 | ((a->n[2] << 12) & 0x3FFFFFFUL); + r->n[3] = a->n[2] >> 14 | ((a->n[3] << 18) & 0x3FFFFFFUL); + r->n[4] = a->n[3] >> 8 | ((a->n[4] << 24) & 0x3FFFFFFUL); + r->n[5] = (a->n[4] >> 2) & 0x3FFFFFFUL; + r->n[6] = a->n[4] >> 28 | ((a->n[5] << 4) & 0x3FFFFFFUL); + r->n[7] = a->n[5] >> 22 | ((a->n[6] << 10) & 0x3FFFFFFUL); + r->n[8] = a->n[6] >> 16 | ((a->n[7] << 16) & 0x3FFFFFFUL); + r->n[9] = a->n[7] >> 10; +#ifdef VERIFY + r->magnitude = 1; + r->normalized = 1; +#endif +} + +#endif diff --git a/bf/secp256k1/src/field_5x52.h b/bf/secp256k1/src/field_5x52.h new file mode 100644 index 0000000..8b24da7 --- /dev/null +++ b/bf/secp256k1/src/field_5x52.h @@ -0,0 +1,47 @@ +/********************************************************************** + * Copyright (c) 2013, 2014 Pieter Wuille * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#ifndef _SECP256K1_FIELD_REPR_ +#define _SECP256K1_FIELD_REPR_ + +#include + +typedef struct { + /* X = sum(i=0..4, elem[i]*2^52) mod n */ + uint64_t n[5]; +#ifdef VERIFY + int magnitude; + int normalized; +#endif +} secp256k1_fe_t; + +/* Unpacks a constant into a overlapping multi-limbed FE element. */ +#define SECP256K1_FE_CONST_INNER(d7, d6, d5, d4, d3, d2, d1, d0) { \ + (d0) | ((uint64_t)(d1) & 0xFFFFFUL) << 32, \ + ((uint64_t)(d1) >> 20) | ((uint64_t)(d2)) << 12 | ((uint64_t)(d3) & 0xFFUL) << 44, \ + ((uint64_t)(d3) >> 8) | ((uint64_t)(d4) & 0xFFFFFFFUL) << 24, \ + ((uint64_t)(d4) >> 28) | ((uint64_t)(d5)) << 4 | ((uint64_t)(d6) & 0xFFFFUL) << 36, \ + ((uint64_t)(d6) >> 16) | ((uint64_t)(d7)) << 16 \ +} + +#ifdef VERIFY +#define SECP256K1_FE_CONST(d7, d6, d5, d4, d3, d2, d1, d0) {SECP256K1_FE_CONST_INNER((d7), (d6), (d5), (d4), (d3), (d2), (d1), (d0)), 1, 1} +#else +#define SECP256K1_FE_CONST(d7, d6, d5, d4, d3, d2, d1, d0) {SECP256K1_FE_CONST_INNER((d7), (d6), (d5), (d4), (d3), (d2), (d1), (d0))} +#endif + +typedef struct { + uint64_t n[4]; +} secp256k1_fe_storage_t; + +#define SECP256K1_FE_STORAGE_CONST(d7, d6, d5, d4, d3, d2, d1, d0) {{ \ + (d0) | ((uint64_t)(d1)) << 32, \ + (d2) | ((uint64_t)(d3)) << 32, \ + (d4) | ((uint64_t)(d5)) << 32, \ + (d6) | ((uint64_t)(d7)) << 32 \ +}} + +#endif diff --git a/bf/secp256k1/src/field_5x52_asm_impl.h b/bf/secp256k1/src/field_5x52_asm_impl.h new file mode 100644 index 0000000..98cc004 --- /dev/null +++ b/bf/secp256k1/src/field_5x52_asm_impl.h @@ -0,0 +1,502 @@ +/********************************************************************** + * Copyright (c) 2013-2014 Diederik Huys, Pieter Wuille * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +/** + * Changelog: + * - March 2013, Diederik Huys: original version + * - November 2014, Pieter Wuille: updated to use Peter Dettman's parallel multiplication algorithm + * - December 2014, Pieter Wuille: converted from YASM to GCC inline assembly + */ + +#ifndef _SECP256K1_FIELD_INNER5X52_IMPL_H_ +#define _SECP256K1_FIELD_INNER5X52_IMPL_H_ + +SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint64_t *r, const uint64_t *a, const uint64_t * SECP256K1_RESTRICT b) { +/** + * Registers: rdx:rax = multiplication accumulator + * r9:r8 = c + * r15:rcx = d + * r10-r14 = a0-a4 + * rbx = b + * rdi = r + * rsi = a / t? + */ + uint64_t tmp1, tmp2, tmp3; +__asm__ __volatile__( + "movq 0(%%rsi),%%r10\n" + "movq 8(%%rsi),%%r11\n" + "movq 16(%%rsi),%%r12\n" + "movq 24(%%rsi),%%r13\n" + "movq 32(%%rsi),%%r14\n" + + /* d += a3 * b0 */ + "movq 0(%%rbx),%%rax\n" + "mulq %%r13\n" + "movq %%rax,%%rcx\n" + "movq %%rdx,%%r15\n" + /* d += a2 * b1 */ + "movq 8(%%rbx),%%rax\n" + "mulq %%r12\n" + "addq %%rax,%%rcx\n" + "adcq %%rdx,%%r15\n" + /* d += a1 * b2 */ + "movq 16(%%rbx),%%rax\n" + "mulq %%r11\n" + "addq %%rax,%%rcx\n" + "adcq %%rdx,%%r15\n" + /* d = a0 * b3 */ + "movq 24(%%rbx),%%rax\n" + "mulq %%r10\n" + "addq %%rax,%%rcx\n" + "adcq %%rdx,%%r15\n" + /* c = a4 * b4 */ + "movq 32(%%rbx),%%rax\n" + "mulq %%r14\n" + "movq %%rax,%%r8\n" + "movq %%rdx,%%r9\n" + /* d += (c & M) * R */ + "movq $0xfffffffffffff,%%rdx\n" + "andq %%rdx,%%rax\n" + "movq $0x1000003d10,%%rdx\n" + "mulq %%rdx\n" + "addq %%rax,%%rcx\n" + "adcq %%rdx,%%r15\n" + /* c >>= 52 (%%r8 only) */ + "shrdq $52,%%r9,%%r8\n" + /* t3 (tmp1) = d & M */ + "movq %%rcx,%%rsi\n" + "movq $0xfffffffffffff,%%rdx\n" + "andq %%rdx,%%rsi\n" + "movq %%rsi,%q1\n" + /* d >>= 52 */ + "shrdq $52,%%r15,%%rcx\n" + "xorq %%r15,%%r15\n" + /* d += a4 * b0 */ + "movq 0(%%rbx),%%rax\n" + "mulq %%r14\n" + "addq %%rax,%%rcx\n" + "adcq %%rdx,%%r15\n" + /* d += a3 * b1 */ + "movq 8(%%rbx),%%rax\n" + "mulq %%r13\n" + "addq %%rax,%%rcx\n" + "adcq %%rdx,%%r15\n" + /* d += a2 * b2 */ + "movq 16(%%rbx),%%rax\n" + "mulq %%r12\n" + "addq %%rax,%%rcx\n" + "adcq %%rdx,%%r15\n" + /* d += a1 * b3 */ + "movq 24(%%rbx),%%rax\n" + "mulq %%r11\n" + "addq %%rax,%%rcx\n" + "adcq %%rdx,%%r15\n" + /* d += a0 * b4 */ + "movq 32(%%rbx),%%rax\n" + "mulq %%r10\n" + "addq %%rax,%%rcx\n" + "adcq %%rdx,%%r15\n" + /* d += c * R */ + "movq %%r8,%%rax\n" + "movq $0x1000003d10,%%rdx\n" + "mulq %%rdx\n" + "addq %%rax,%%rcx\n" + "adcq %%rdx,%%r15\n" + /* t4 = d & M (%%rsi) */ + "movq %%rcx,%%rsi\n" + "movq $0xfffffffffffff,%%rdx\n" + "andq %%rdx,%%rsi\n" + /* d >>= 52 */ + "shrdq $52,%%r15,%%rcx\n" + "xorq %%r15,%%r15\n" + /* tx = t4 >> 48 (tmp3) */ + "movq %%rsi,%%rax\n" + "shrq $48,%%rax\n" + "movq %%rax,%q3\n" + /* t4 &= (M >> 4) (tmp2) */ + "movq $0xffffffffffff,%%rax\n" + "andq %%rax,%%rsi\n" + "movq %%rsi,%q2\n" + /* c = a0 * b0 */ + "movq 0(%%rbx),%%rax\n" + "mulq %%r10\n" + "movq %%rax,%%r8\n" + "movq %%rdx,%%r9\n" + /* d += a4 * b1 */ + "movq 8(%%rbx),%%rax\n" + "mulq %%r14\n" + "addq %%rax,%%rcx\n" + "adcq %%rdx,%%r15\n" + /* d += a3 * b2 */ + "movq 16(%%rbx),%%rax\n" + "mulq %%r13\n" + "addq %%rax,%%rcx\n" + "adcq %%rdx,%%r15\n" + /* d += a2 * b3 */ + "movq 24(%%rbx),%%rax\n" + "mulq %%r12\n" + "addq %%rax,%%rcx\n" + "adcq %%rdx,%%r15\n" + /* d += a1 * b4 */ + "movq 32(%%rbx),%%rax\n" + "mulq %%r11\n" + "addq %%rax,%%rcx\n" + "adcq %%rdx,%%r15\n" + /* u0 = d & M (%%rsi) */ + "movq %%rcx,%%rsi\n" + "movq $0xfffffffffffff,%%rdx\n" + "andq %%rdx,%%rsi\n" + /* d >>= 52 */ + "shrdq $52,%%r15,%%rcx\n" + "xorq %%r15,%%r15\n" + /* u0 = (u0 << 4) | tx (%%rsi) */ + "shlq $4,%%rsi\n" + "movq %q3,%%rax\n" + "orq %%rax,%%rsi\n" + /* c += u0 * (R >> 4) */ + "movq $0x1000003d1,%%rax\n" + "mulq %%rsi\n" + "addq %%rax,%%r8\n" + "adcq %%rdx,%%r9\n" + /* r[0] = c & M */ + "movq %%r8,%%rax\n" + "movq $0xfffffffffffff,%%rdx\n" + "andq %%rdx,%%rax\n" + "movq %%rax,0(%%rdi)\n" + /* c >>= 52 */ + "shrdq $52,%%r9,%%r8\n" + "xorq %%r9,%%r9\n" + /* c += a1 * b0 */ + "movq 0(%%rbx),%%rax\n" + "mulq %%r11\n" + "addq %%rax,%%r8\n" + "adcq %%rdx,%%r9\n" + /* c += a0 * b1 */ + "movq 8(%%rbx),%%rax\n" + "mulq %%r10\n" + "addq %%rax,%%r8\n" + "adcq %%rdx,%%r9\n" + /* d += a4 * b2 */ + "movq 16(%%rbx),%%rax\n" + "mulq %%r14\n" + "addq %%rax,%%rcx\n" + "adcq %%rdx,%%r15\n" + /* d += a3 * b3 */ + "movq 24(%%rbx),%%rax\n" + "mulq %%r13\n" + "addq %%rax,%%rcx\n" + "adcq %%rdx,%%r15\n" + /* d += a2 * b4 */ + "movq 32(%%rbx),%%rax\n" + "mulq %%r12\n" + "addq %%rax,%%rcx\n" + "adcq %%rdx,%%r15\n" + /* c += (d & M) * R */ + "movq %%rcx,%%rax\n" + "movq $0xfffffffffffff,%%rdx\n" + "andq %%rdx,%%rax\n" + "movq $0x1000003d10,%%rdx\n" + "mulq %%rdx\n" + "addq %%rax,%%r8\n" + "adcq %%rdx,%%r9\n" + /* d >>= 52 */ + "shrdq $52,%%r15,%%rcx\n" + "xorq %%r15,%%r15\n" + /* r[1] = c & M */ + "movq %%r8,%%rax\n" + "movq $0xfffffffffffff,%%rdx\n" + "andq %%rdx,%%rax\n" + "movq %%rax,8(%%rdi)\n" + /* c >>= 52 */ + "shrdq $52,%%r9,%%r8\n" + "xorq %%r9,%%r9\n" + /* c += a2 * b0 */ + "movq 0(%%rbx),%%rax\n" + "mulq %%r12\n" + "addq %%rax,%%r8\n" + "adcq %%rdx,%%r9\n" + /* c += a1 * b1 */ + "movq 8(%%rbx),%%rax\n" + "mulq %%r11\n" + "addq %%rax,%%r8\n" + "adcq %%rdx,%%r9\n" + /* c += a0 * b2 (last use of %%r10 = a0) */ + "movq 16(%%rbx),%%rax\n" + "mulq %%r10\n" + "addq %%rax,%%r8\n" + "adcq %%rdx,%%r9\n" + /* fetch t3 (%%r10, overwrites a0), t4 (%%rsi) */ + "movq %q2,%%rsi\n" + "movq %q1,%%r10\n" + /* d += a4 * b3 */ + "movq 24(%%rbx),%%rax\n" + "mulq %%r14\n" + "addq %%rax,%%rcx\n" + "adcq %%rdx,%%r15\n" + /* d += a3 * b4 */ + "movq 32(%%rbx),%%rax\n" + "mulq %%r13\n" + "addq %%rax,%%rcx\n" + "adcq %%rdx,%%r15\n" + /* c += (d & M) * R */ + "movq %%rcx,%%rax\n" + "movq $0xfffffffffffff,%%rdx\n" + "andq %%rdx,%%rax\n" + "movq $0x1000003d10,%%rdx\n" + "mulq %%rdx\n" + "addq %%rax,%%r8\n" + "adcq %%rdx,%%r9\n" + /* d >>= 52 (%%rcx only) */ + "shrdq $52,%%r15,%%rcx\n" + /* r[2] = c & M */ + "movq %%r8,%%rax\n" + "movq $0xfffffffffffff,%%rdx\n" + "andq %%rdx,%%rax\n" + "movq %%rax,16(%%rdi)\n" + /* c >>= 52 */ + "shrdq $52,%%r9,%%r8\n" + "xorq %%r9,%%r9\n" + /* c += t3 */ + "addq %%r10,%%r8\n" + /* c += d * R */ + "movq %%rcx,%%rax\n" + "movq $0x1000003d10,%%rdx\n" + "mulq %%rdx\n" + "addq %%rax,%%r8\n" + "adcq %%rdx,%%r9\n" + /* r[3] = c & M */ + "movq %%r8,%%rax\n" + "movq $0xfffffffffffff,%%rdx\n" + "andq %%rdx,%%rax\n" + "movq %%rax,24(%%rdi)\n" + /* c >>= 52 (%%r8 only) */ + "shrdq $52,%%r9,%%r8\n" + /* c += t4 (%%r8 only) */ + "addq %%rsi,%%r8\n" + /* r[4] = c */ + "movq %%r8,32(%%rdi)\n" +: "+S"(a), "=m"(tmp1), "=m"(tmp2), "=m"(tmp3) +: "b"(b), "D"(r) +: "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "cc", "memory" +); +} + +SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint64_t *r, const uint64_t *a) { +/** + * Registers: rdx:rax = multiplication accumulator + * r9:r8 = c + * rcx:rbx = d + * r10-r14 = a0-a4 + * r15 = M (0xfffffffffffff) + * rdi = r + * rsi = a / t? + */ + uint64_t tmp1, tmp2, tmp3; +__asm__ __volatile__( + "movq 0(%%rsi),%%r10\n" + "movq 8(%%rsi),%%r11\n" + "movq 16(%%rsi),%%r12\n" + "movq 24(%%rsi),%%r13\n" + "movq 32(%%rsi),%%r14\n" + "movq $0xfffffffffffff,%%r15\n" + + /* d = (a0*2) * a3 */ + "leaq (%%r10,%%r10,1),%%rax\n" + "mulq %%r13\n" + "movq %%rax,%%rbx\n" + "movq %%rdx,%%rcx\n" + /* d += (a1*2) * a2 */ + "leaq (%%r11,%%r11,1),%%rax\n" + "mulq %%r12\n" + "addq %%rax,%%rbx\n" + "adcq %%rdx,%%rcx\n" + /* c = a4 * a4 */ + "movq %%r14,%%rax\n" + "mulq %%r14\n" + "movq %%rax,%%r8\n" + "movq %%rdx,%%r9\n" + /* d += (c & M) * R */ + "andq %%r15,%%rax\n" + "movq $0x1000003d10,%%rdx\n" + "mulq %%rdx\n" + "addq %%rax,%%rbx\n" + "adcq %%rdx,%%rcx\n" + /* c >>= 52 (%%r8 only) */ + "shrdq $52,%%r9,%%r8\n" + /* t3 (tmp1) = d & M */ + "movq %%rbx,%%rsi\n" + "andq %%r15,%%rsi\n" + "movq %%rsi,%q1\n" + /* d >>= 52 */ + "shrdq $52,%%rcx,%%rbx\n" + "xorq %%rcx,%%rcx\n" + /* a4 *= 2 */ + "addq %%r14,%%r14\n" + /* d += a0 * a4 */ + "movq %%r10,%%rax\n" + "mulq %%r14\n" + "addq %%rax,%%rbx\n" + "adcq %%rdx,%%rcx\n" + /* d+= (a1*2) * a3 */ + "leaq (%%r11,%%r11,1),%%rax\n" + "mulq %%r13\n" + "addq %%rax,%%rbx\n" + "adcq %%rdx,%%rcx\n" + /* d += a2 * a2 */ + "movq %%r12,%%rax\n" + "mulq %%r12\n" + "addq %%rax,%%rbx\n" + "adcq %%rdx,%%rcx\n" + /* d += c * R */ + "movq %%r8,%%rax\n" + "movq $0x1000003d10,%%rdx\n" + "mulq %%rdx\n" + "addq %%rax,%%rbx\n" + "adcq %%rdx,%%rcx\n" + /* t4 = d & M (%%rsi) */ + "movq %%rbx,%%rsi\n" + "andq %%r15,%%rsi\n" + /* d >>= 52 */ + "shrdq $52,%%rcx,%%rbx\n" + "xorq %%rcx,%%rcx\n" + /* tx = t4 >> 48 (tmp3) */ + "movq %%rsi,%%rax\n" + "shrq $48,%%rax\n" + "movq %%rax,%q3\n" + /* t4 &= (M >> 4) (tmp2) */ + "movq $0xffffffffffff,%%rax\n" + "andq %%rax,%%rsi\n" + "movq %%rsi,%q2\n" + /* c = a0 * a0 */ + "movq %%r10,%%rax\n" + "mulq %%r10\n" + "movq %%rax,%%r8\n" + "movq %%rdx,%%r9\n" + /* d += a1 * a4 */ + "movq %%r11,%%rax\n" + "mulq %%r14\n" + "addq %%rax,%%rbx\n" + "adcq %%rdx,%%rcx\n" + /* d += (a2*2) * a3 */ + "leaq (%%r12,%%r12,1),%%rax\n" + "mulq %%r13\n" + "addq %%rax,%%rbx\n" + "adcq %%rdx,%%rcx\n" + /* u0 = d & M (%%rsi) */ + "movq %%rbx,%%rsi\n" + "andq %%r15,%%rsi\n" + /* d >>= 52 */ + "shrdq $52,%%rcx,%%rbx\n" + "xorq %%rcx,%%rcx\n" + /* u0 = (u0 << 4) | tx (%%rsi) */ + "shlq $4,%%rsi\n" + "movq %q3,%%rax\n" + "orq %%rax,%%rsi\n" + /* c += u0 * (R >> 4) */ + "movq $0x1000003d1,%%rax\n" + "mulq %%rsi\n" + "addq %%rax,%%r8\n" + "adcq %%rdx,%%r9\n" + /* r[0] = c & M */ + "movq %%r8,%%rax\n" + "andq %%r15,%%rax\n" + "movq %%rax,0(%%rdi)\n" + /* c >>= 52 */ + "shrdq $52,%%r9,%%r8\n" + "xorq %%r9,%%r9\n" + /* a0 *= 2 */ + "addq %%r10,%%r10\n" + /* c += a0 * a1 */ + "movq %%r10,%%rax\n" + "mulq %%r11\n" + "addq %%rax,%%r8\n" + "adcq %%rdx,%%r9\n" + /* d += a2 * a4 */ + "movq %%r12,%%rax\n" + "mulq %%r14\n" + "addq %%rax,%%rbx\n" + "adcq %%rdx,%%rcx\n" + /* d += a3 * a3 */ + "movq %%r13,%%rax\n" + "mulq %%r13\n" + "addq %%rax,%%rbx\n" + "adcq %%rdx,%%rcx\n" + /* c += (d & M) * R */ + "movq %%rbx,%%rax\n" + "andq %%r15,%%rax\n" + "movq $0x1000003d10,%%rdx\n" + "mulq %%rdx\n" + "addq %%rax,%%r8\n" + "adcq %%rdx,%%r9\n" + /* d >>= 52 */ + "shrdq $52,%%rcx,%%rbx\n" + "xorq %%rcx,%%rcx\n" + /* r[1] = c & M */ + "movq %%r8,%%rax\n" + "andq %%r15,%%rax\n" + "movq %%rax,8(%%rdi)\n" + /* c >>= 52 */ + "shrdq $52,%%r9,%%r8\n" + "xorq %%r9,%%r9\n" + /* c += a0 * a2 (last use of %%r10) */ + "movq %%r10,%%rax\n" + "mulq %%r12\n" + "addq %%rax,%%r8\n" + "adcq %%rdx,%%r9\n" + /* fetch t3 (%%r10, overwrites a0),t4 (%%rsi) */ + "movq %q2,%%rsi\n" + "movq %q1,%%r10\n" + /* c += a1 * a1 */ + "movq %%r11,%%rax\n" + "mulq %%r11\n" + "addq %%rax,%%r8\n" + "adcq %%rdx,%%r9\n" + /* d += a3 * a4 */ + "movq %%r13,%%rax\n" + "mulq %%r14\n" + "addq %%rax,%%rbx\n" + "adcq %%rdx,%%rcx\n" + /* c += (d & M) * R */ + "movq %%rbx,%%rax\n" + "andq %%r15,%%rax\n" + "movq $0x1000003d10,%%rdx\n" + "mulq %%rdx\n" + "addq %%rax,%%r8\n" + "adcq %%rdx,%%r9\n" + /* d >>= 52 (%%rbx only) */ + "shrdq $52,%%rcx,%%rbx\n" + /* r[2] = c & M */ + "movq %%r8,%%rax\n" + "andq %%r15,%%rax\n" + "movq %%rax,16(%%rdi)\n" + /* c >>= 52 */ + "shrdq $52,%%r9,%%r8\n" + "xorq %%r9,%%r9\n" + /* c += t3 */ + "addq %%r10,%%r8\n" + /* c += d * R */ + "movq %%rbx,%%rax\n" + "movq $0x1000003d10,%%rdx\n" + "mulq %%rdx\n" + "addq %%rax,%%r8\n" + "adcq %%rdx,%%r9\n" + /* r[3] = c & M */ + "movq %%r8,%%rax\n" + "andq %%r15,%%rax\n" + "movq %%rax,24(%%rdi)\n" + /* c >>= 52 (%%r8 only) */ + "shrdq $52,%%r9,%%r8\n" + /* c += t4 (%%r8 only) */ + "addq %%rsi,%%r8\n" + /* r[4] = c */ + "movq %%r8,32(%%rdi)\n" +: "+S"(a), "=m"(tmp1), "=m"(tmp2), "=m"(tmp3) +: "D"(r) +: "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "cc", "memory" +); +} + +#endif diff --git a/bf/secp256k1/src/field_5x52_impl.h b/bf/secp256k1/src/field_5x52_impl.h new file mode 100644 index 0000000..768e7c9 --- /dev/null +++ b/bf/secp256k1/src/field_5x52_impl.h @@ -0,0 +1,456 @@ +/********************************************************************** + * Copyright (c) 2013, 2014 Pieter Wuille * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#ifndef _SECP256K1_FIELD_REPR_IMPL_H_ +#define _SECP256K1_FIELD_REPR_IMPL_H_ + +#if defined HAVE_CONFIG_H +#include "libsecp256k1-config.h" +#endif + +#include +#include "util.h" +#include "num.h" +#include "field.h" + +#if defined(USE_ASM_X86_64) +#include "field_5x52_asm_impl.h" +#else +#include "field_5x52_int128_impl.h" +#endif + +/** Implements arithmetic modulo FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFE FFFFFC2F, + * represented as 5 uint64_t's in base 2^52. The values are allowed to contain >52 each. In particular, + * each FieldElem has a 'magnitude' associated with it. Internally, a magnitude M means each element + * is at most M*(2^53-1), except the most significant one, which is limited to M*(2^49-1). All operations + * accept any input with magnitude at most M, and have different rules for propagating magnitude to their + * output. + */ + +#ifdef VERIFY +static void secp256k1_fe_verify(const secp256k1_fe_t *a) { + const uint64_t *d = a->n; + int m = a->normalized ? 1 : 2 * a->magnitude, r = 1; + /* secp256k1 'p' value defined in "Standards for Efficient Cryptography" (SEC2) 2.7.1. */ + r &= (d[0] <= 0xFFFFFFFFFFFFFULL * m); + r &= (d[1] <= 0xFFFFFFFFFFFFFULL * m); + r &= (d[2] <= 0xFFFFFFFFFFFFFULL * m); + r &= (d[3] <= 0xFFFFFFFFFFFFFULL * m); + r &= (d[4] <= 0x0FFFFFFFFFFFFULL * m); + r &= (a->magnitude >= 0); + r &= (a->magnitude <= 2048); + if (a->normalized) { + r &= (a->magnitude <= 1); + if (r && (d[4] == 0x0FFFFFFFFFFFFULL) && ((d[3] & d[2] & d[1]) == 0xFFFFFFFFFFFFFULL)) { + r &= (d[0] < 0xFFFFEFFFFFC2FULL); + } + } + VERIFY_CHECK(r == 1); +} +#else +static void secp256k1_fe_verify(const secp256k1_fe_t *a) { + (void)a; +} +#endif + +static void secp256k1_fe_normalize(secp256k1_fe_t *r) { + uint64_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4]; + + /* Reduce t4 at the start so there will be at most a single carry from the first pass */ + uint64_t m; + uint64_t x = t4 >> 48; t4 &= 0x0FFFFFFFFFFFFULL; + + /* The first pass ensures the magnitude is 1, ... */ + t0 += x * 0x1000003D1ULL; + t1 += (t0 >> 52); t0 &= 0xFFFFFFFFFFFFFULL; + t2 += (t1 >> 52); t1 &= 0xFFFFFFFFFFFFFULL; m = t1; + t3 += (t2 >> 52); t2 &= 0xFFFFFFFFFFFFFULL; m &= t2; + t4 += (t3 >> 52); t3 &= 0xFFFFFFFFFFFFFULL; m &= t3; + + /* ... except for a possible carry at bit 48 of t4 (i.e. bit 256 of the field element) */ + VERIFY_CHECK(t4 >> 49 == 0); + + /* At most a single final reduction is needed; check if the value is >= the field characteristic */ + x = (t4 >> 48) | ((t4 == 0x0FFFFFFFFFFFFULL) & (m == 0xFFFFFFFFFFFFFULL) + & (t0 >= 0xFFFFEFFFFFC2FULL)); + + /* Apply the final reduction (for constant-time behaviour, we do it always) */ + t0 += x * 0x1000003D1ULL; + t1 += (t0 >> 52); t0 &= 0xFFFFFFFFFFFFFULL; + t2 += (t1 >> 52); t1 &= 0xFFFFFFFFFFFFFULL; + t3 += (t2 >> 52); t2 &= 0xFFFFFFFFFFFFFULL; + t4 += (t3 >> 52); t3 &= 0xFFFFFFFFFFFFFULL; + + /* If t4 didn't carry to bit 48 already, then it should have after any final reduction */ + VERIFY_CHECK(t4 >> 48 == x); + + /* Mask off the possible multiple of 2^256 from the final reduction */ + t4 &= 0x0FFFFFFFFFFFFULL; + + r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4; + +#ifdef VERIFY + r->magnitude = 1; + r->normalized = 1; + secp256k1_fe_verify(r); +#endif +} + +static void secp256k1_fe_normalize_weak(secp256k1_fe_t *r) { + uint64_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4]; + + /* Reduce t4 at the start so there will be at most a single carry from the first pass */ + uint64_t x = t4 >> 48; t4 &= 0x0FFFFFFFFFFFFULL; + + /* The first pass ensures the magnitude is 1, ... */ + t0 += x * 0x1000003D1ULL; + t1 += (t0 >> 52); t0 &= 0xFFFFFFFFFFFFFULL; + t2 += (t1 >> 52); t1 &= 0xFFFFFFFFFFFFFULL; + t3 += (t2 >> 52); t2 &= 0xFFFFFFFFFFFFFULL; + t4 += (t3 >> 52); t3 &= 0xFFFFFFFFFFFFFULL; + + /* ... except for a possible carry at bit 48 of t4 (i.e. bit 256 of the field element) */ + VERIFY_CHECK(t4 >> 49 == 0); + + r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4; + +#ifdef VERIFY + r->magnitude = 1; + secp256k1_fe_verify(r); +#endif +} + +static void secp256k1_fe_normalize_var(secp256k1_fe_t *r) { + uint64_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4]; + + /* Reduce t4 at the start so there will be at most a single carry from the first pass */ + uint64_t m; + uint64_t x = t4 >> 48; t4 &= 0x0FFFFFFFFFFFFULL; + + /* The first pass ensures the magnitude is 1, ... */ + t0 += x * 0x1000003D1ULL; + t1 += (t0 >> 52); t0 &= 0xFFFFFFFFFFFFFULL; + t2 += (t1 >> 52); t1 &= 0xFFFFFFFFFFFFFULL; m = t1; + t3 += (t2 >> 52); t2 &= 0xFFFFFFFFFFFFFULL; m &= t2; + t4 += (t3 >> 52); t3 &= 0xFFFFFFFFFFFFFULL; m &= t3; + + /* ... except for a possible carry at bit 48 of t4 (i.e. bit 256 of the field element) */ + VERIFY_CHECK(t4 >> 49 == 0); + + /* At most a single final reduction is needed; check if the value is >= the field characteristic */ + x = (t4 >> 48) | ((t4 == 0x0FFFFFFFFFFFFULL) & (m == 0xFFFFFFFFFFFFFULL) + & (t0 >= 0xFFFFEFFFFFC2FULL)); + + if (x) { + t0 += 0x1000003D1ULL; + t1 += (t0 >> 52); t0 &= 0xFFFFFFFFFFFFFULL; + t2 += (t1 >> 52); t1 &= 0xFFFFFFFFFFFFFULL; + t3 += (t2 >> 52); t2 &= 0xFFFFFFFFFFFFFULL; + t4 += (t3 >> 52); t3 &= 0xFFFFFFFFFFFFFULL; + + /* If t4 didn't carry to bit 48 already, then it should have after any final reduction */ + VERIFY_CHECK(t4 >> 48 == x); + + /* Mask off the possible multiple of 2^256 from the final reduction */ + t4 &= 0x0FFFFFFFFFFFFULL; + } + + r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4; + +#ifdef VERIFY + r->magnitude = 1; + r->normalized = 1; + secp256k1_fe_verify(r); +#endif +} + +static int secp256k1_fe_normalizes_to_zero(secp256k1_fe_t *r) { + uint64_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4]; + + /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */ + uint64_t z0, z1; + + /* Reduce t4 at the start so there will be at most a single carry from the first pass */ + uint64_t x = t4 >> 48; t4 &= 0x0FFFFFFFFFFFFULL; + + /* The first pass ensures the magnitude is 1, ... */ + t0 += x * 0x1000003D1ULL; + t1 += (t0 >> 52); t0 &= 0xFFFFFFFFFFFFFULL; z0 = t0; z1 = t0 ^ 0x1000003D0ULL; + t2 += (t1 >> 52); t1 &= 0xFFFFFFFFFFFFFULL; z0 |= t1; z1 &= t1; + t3 += (t2 >> 52); t2 &= 0xFFFFFFFFFFFFFULL; z0 |= t2; z1 &= t2; + t4 += (t3 >> 52); t3 &= 0xFFFFFFFFFFFFFULL; z0 |= t3; z1 &= t3; + z0 |= t4; z1 &= t4 ^ 0xF000000000000ULL; + + /* ... except for a possible carry at bit 48 of t4 (i.e. bit 256 of the field element) */ + VERIFY_CHECK(t4 >> 49 == 0); + + return (z0 == 0) | (z1 == 0xFFFFFFFFFFFFFULL); +} + +static int secp256k1_fe_normalizes_to_zero_var(secp256k1_fe_t *r) { + uint64_t t0, t1, t2, t3, t4; + uint64_t z0, z1; + uint64_t x; + + t0 = r->n[0]; + t4 = r->n[4]; + + /* Reduce t4 at the start so there will be at most a single carry from the first pass */ + x = t4 >> 48; + + /* The first pass ensures the magnitude is 1, ... */ + t0 += x * 0x1000003D1ULL; + + /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */ + z0 = t0 & 0xFFFFFFFFFFFFFULL; + z1 = z0 ^ 0x1000003D0ULL; + + /* Fast return path should catch the majority of cases */ + if ((z0 != 0ULL) & (z1 != 0xFFFFFFFFFFFFFULL)) { + return 0; + } + + t1 = r->n[1]; + t2 = r->n[2]; + t3 = r->n[3]; + + t4 &= 0x0FFFFFFFFFFFFULL; + + t1 += (t0 >> 52); t0 = z0; + t2 += (t1 >> 52); t1 &= 0xFFFFFFFFFFFFFULL; z0 |= t1; z1 &= t1; + t3 += (t2 >> 52); t2 &= 0xFFFFFFFFFFFFFULL; z0 |= t2; z1 &= t2; + t4 += (t3 >> 52); t3 &= 0xFFFFFFFFFFFFFULL; z0 |= t3; z1 &= t3; + z0 |= t4; z1 &= t4 ^ 0xF000000000000ULL; + + /* ... except for a possible carry at bit 48 of t4 (i.e. bit 256 of the field element) */ + VERIFY_CHECK(t4 >> 49 == 0); + + return (z0 == 0) | (z1 == 0xFFFFFFFFFFFFFULL); +} + +SECP256K1_INLINE static void secp256k1_fe_set_int(secp256k1_fe_t *r, int a) { + r->n[0] = a; + r->n[1] = r->n[2] = r->n[3] = r->n[4] = 0; +#ifdef VERIFY + r->magnitude = 1; + r->normalized = 1; + secp256k1_fe_verify(r); +#endif +} + +SECP256K1_INLINE static int secp256k1_fe_is_zero(const secp256k1_fe_t *a) { + const uint64_t *t = a->n; +#ifdef VERIFY + VERIFY_CHECK(a->normalized); + secp256k1_fe_verify(a); +#endif + return (t[0] | t[1] | t[2] | t[3] | t[4]) == 0; +} + +SECP256K1_INLINE static int secp256k1_fe_is_odd(const secp256k1_fe_t *a) { +#ifdef VERIFY + VERIFY_CHECK(a->normalized); + secp256k1_fe_verify(a); +#endif + return a->n[0] & 1; +} + +SECP256K1_INLINE static void secp256k1_fe_clear(secp256k1_fe_t *a) { + int i; +#ifdef VERIFY + a->magnitude = 0; + a->normalized = 1; +#endif + for (i=0; i<5; i++) { + a->n[i] = 0; + } +} + +static int secp256k1_fe_cmp_var(const secp256k1_fe_t *a, const secp256k1_fe_t *b) { + int i; +#ifdef VERIFY + VERIFY_CHECK(a->normalized); + VERIFY_CHECK(b->normalized); + secp256k1_fe_verify(a); + secp256k1_fe_verify(b); +#endif + for (i = 4; i >= 0; i--) { + if (a->n[i] > b->n[i]) { + return 1; + } + if (a->n[i] < b->n[i]) { + return -1; + } + } + return 0; +} + +static int secp256k1_fe_set_b32(secp256k1_fe_t *r, const unsigned char *a) { + int i; + r->n[0] = r->n[1] = r->n[2] = r->n[3] = r->n[4] = 0; + for (i=0; i<32; i++) { + int j; + for (j=0; j<2; j++) { + int limb = (8*i+4*j)/52; + int shift = (8*i+4*j)%52; + r->n[limb] |= (uint64_t)((a[31-i] >> (4*j)) & 0xF) << shift; + } + } + if (r->n[4] == 0x0FFFFFFFFFFFFULL && (r->n[3] & r->n[2] & r->n[1]) == 0xFFFFFFFFFFFFFULL && r->n[0] >= 0xFFFFEFFFFFC2FULL) { + return 0; + } +#ifdef VERIFY + r->magnitude = 1; + r->normalized = 1; + secp256k1_fe_verify(r); +#endif + return 1; +} + +/** Convert a field element to a 32-byte big endian value. Requires the input to be normalized */ +static void secp256k1_fe_get_b32(unsigned char *r, const secp256k1_fe_t *a) { + int i; +#ifdef VERIFY + VERIFY_CHECK(a->normalized); + secp256k1_fe_verify(a); +#endif + for (i=0; i<32; i++) { + int j; + int c = 0; + for (j=0; j<2; j++) { + int limb = (8*i+4*j)/52; + int shift = (8*i+4*j)%52; + c |= ((a->n[limb] >> shift) & 0xF) << (4 * j); + } + r[31-i] = c; + } +} + +SECP256K1_INLINE static void secp256k1_fe_negate(secp256k1_fe_t *r, const secp256k1_fe_t *a, int m) { +#ifdef VERIFY + VERIFY_CHECK(a->magnitude <= m); + secp256k1_fe_verify(a); +#endif + r->n[0] = 0xFFFFEFFFFFC2FULL * 2 * (m + 1) - a->n[0]; + r->n[1] = 0xFFFFFFFFFFFFFULL * 2 * (m + 1) - a->n[1]; + r->n[2] = 0xFFFFFFFFFFFFFULL * 2 * (m + 1) - a->n[2]; + r->n[3] = 0xFFFFFFFFFFFFFULL * 2 * (m + 1) - a->n[3]; + r->n[4] = 0x0FFFFFFFFFFFFULL * 2 * (m + 1) - a->n[4]; +#ifdef VERIFY + r->magnitude = m + 1; + r->normalized = 0; + secp256k1_fe_verify(r); +#endif +} + +SECP256K1_INLINE static void secp256k1_fe_mul_int(secp256k1_fe_t *r, int a) { + r->n[0] *= a; + r->n[1] *= a; + r->n[2] *= a; + r->n[3] *= a; + r->n[4] *= a; +#ifdef VERIFY + r->magnitude *= a; + r->normalized = 0; + secp256k1_fe_verify(r); +#endif +} + +SECP256K1_INLINE static void secp256k1_fe_add(secp256k1_fe_t *r, const secp256k1_fe_t *a) { +#ifdef VERIFY + secp256k1_fe_verify(a); +#endif + r->n[0] += a->n[0]; + r->n[1] += a->n[1]; + r->n[2] += a->n[2]; + r->n[3] += a->n[3]; + r->n[4] += a->n[4]; +#ifdef VERIFY + r->magnitude += a->magnitude; + r->normalized = 0; + secp256k1_fe_verify(r); +#endif +} + +static void secp256k1_fe_mul(secp256k1_fe_t *r, const secp256k1_fe_t *a, const secp256k1_fe_t * SECP256K1_RESTRICT b) { +#ifdef VERIFY + VERIFY_CHECK(a->magnitude <= 8); + VERIFY_CHECK(b->magnitude <= 8); + secp256k1_fe_verify(a); + secp256k1_fe_verify(b); + VERIFY_CHECK(r != b); +#endif + secp256k1_fe_mul_inner(r->n, a->n, b->n); +#ifdef VERIFY + r->magnitude = 1; + r->normalized = 0; + secp256k1_fe_verify(r); +#endif +} + +static void secp256k1_fe_sqr(secp256k1_fe_t *r, const secp256k1_fe_t *a) { +#ifdef VERIFY + VERIFY_CHECK(a->magnitude <= 8); + secp256k1_fe_verify(a); +#endif + secp256k1_fe_sqr_inner(r->n, a->n); +#ifdef VERIFY + r->magnitude = 1; + r->normalized = 0; + secp256k1_fe_verify(r); +#endif +} + +static SECP256K1_INLINE void secp256k1_fe_cmov(secp256k1_fe_t *r, const secp256k1_fe_t *a, int flag) { + uint64_t mask0, mask1; + mask0 = flag + ~((uint64_t)0); + mask1 = ~mask0; + r->n[0] = (r->n[0] & mask0) | (a->n[0] & mask1); + r->n[1] = (r->n[1] & mask0) | (a->n[1] & mask1); + r->n[2] = (r->n[2] & mask0) | (a->n[2] & mask1); + r->n[3] = (r->n[3] & mask0) | (a->n[3] & mask1); + r->n[4] = (r->n[4] & mask0) | (a->n[4] & mask1); +#ifdef VERIFY + if (a->magnitude > r->magnitude) { + r->magnitude = a->magnitude; + } + r->normalized &= a->normalized; +#endif +} + +static SECP256K1_INLINE void secp256k1_fe_storage_cmov(secp256k1_fe_storage_t *r, const secp256k1_fe_storage_t *a, int flag) { + uint64_t mask0, mask1; + mask0 = flag + ~((uint64_t)0); + mask1 = ~mask0; + r->n[0] = (r->n[0] & mask0) | (a->n[0] & mask1); + r->n[1] = (r->n[1] & mask0) | (a->n[1] & mask1); + r->n[2] = (r->n[2] & mask0) | (a->n[2] & mask1); + r->n[3] = (r->n[3] & mask0) | (a->n[3] & mask1); +} + +static void secp256k1_fe_to_storage(secp256k1_fe_storage_t *r, const secp256k1_fe_t *a) { +#ifdef VERIFY + VERIFY_CHECK(a->normalized); +#endif + r->n[0] = a->n[0] | a->n[1] << 52; + r->n[1] = a->n[1] >> 12 | a->n[2] << 40; + r->n[2] = a->n[2] >> 24 | a->n[3] << 28; + r->n[3] = a->n[3] >> 36 | a->n[4] << 16; +} + +static SECP256K1_INLINE void secp256k1_fe_from_storage(secp256k1_fe_t *r, const secp256k1_fe_storage_t *a) { + r->n[0] = a->n[0] & 0xFFFFFFFFFFFFFULL; + r->n[1] = a->n[0] >> 52 | ((a->n[1] << 12) & 0xFFFFFFFFFFFFFULL); + r->n[2] = a->n[1] >> 40 | ((a->n[2] << 24) & 0xFFFFFFFFFFFFFULL); + r->n[3] = a->n[2] >> 28 | ((a->n[3] << 36) & 0xFFFFFFFFFFFFFULL); + r->n[4] = a->n[3] >> 16; +#ifdef VERIFY + r->magnitude = 1; + r->normalized = 1; +#endif +} + +#endif diff --git a/bf/secp256k1/src/field_5x52_int128_impl.h b/bf/secp256k1/src/field_5x52_int128_impl.h new file mode 100644 index 0000000..9280bb5 --- /dev/null +++ b/bf/secp256k1/src/field_5x52_int128_impl.h @@ -0,0 +1,277 @@ +/********************************************************************** + * Copyright (c) 2013, 2014 Pieter Wuille * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#ifndef _SECP256K1_FIELD_INNER5X52_IMPL_H_ +#define _SECP256K1_FIELD_INNER5X52_IMPL_H_ + +#include + +#ifdef VERIFY +#define VERIFY_BITS(x, n) VERIFY_CHECK(((x) >> (n)) == 0) +#else +#define VERIFY_BITS(x, n) do { } while(0) +#endif + +SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint64_t *r, const uint64_t *a, const uint64_t * SECP256K1_RESTRICT b) { + uint128_t c, d; + uint64_t t3, t4, tx, u0; + uint64_t a0 = a[0], a1 = a[1], a2 = a[2], a3 = a[3], a4 = a[4]; + const uint64_t M = 0xFFFFFFFFFFFFFULL, R = 0x1000003D10ULL; + + VERIFY_BITS(a[0], 56); + VERIFY_BITS(a[1], 56); + VERIFY_BITS(a[2], 56); + VERIFY_BITS(a[3], 56); + VERIFY_BITS(a[4], 52); + VERIFY_BITS(b[0], 56); + VERIFY_BITS(b[1], 56); + VERIFY_BITS(b[2], 56); + VERIFY_BITS(b[3], 56); + VERIFY_BITS(b[4], 52); + VERIFY_CHECK(r != b); + + /* [... a b c] is a shorthand for ... + a<<104 + b<<52 + c<<0 mod n. + * px is a shorthand for sum(a[i]*b[x-i], i=0..x). + * Note that [x 0 0 0 0 0] = [x*R]. + */ + + d = (uint128_t)a0 * b[3] + + (uint128_t)a1 * b[2] + + (uint128_t)a2 * b[1] + + (uint128_t)a3 * b[0]; + VERIFY_BITS(d, 114); + /* [d 0 0 0] = [p3 0 0 0] */ + c = (uint128_t)a4 * b[4]; + VERIFY_BITS(c, 112); + /* [c 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */ + d += (c & M) * R; c >>= 52; + VERIFY_BITS(d, 115); + VERIFY_BITS(c, 60); + /* [c 0 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */ + t3 = d & M; d >>= 52; + VERIFY_BITS(t3, 52); + VERIFY_BITS(d, 63); + /* [c 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */ + + d += (uint128_t)a0 * b[4] + + (uint128_t)a1 * b[3] + + (uint128_t)a2 * b[2] + + (uint128_t)a3 * b[1] + + (uint128_t)a4 * b[0]; + VERIFY_BITS(d, 115); + /* [c 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */ + d += c * R; + VERIFY_BITS(d, 116); + /* [d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */ + t4 = d & M; d >>= 52; + VERIFY_BITS(t4, 52); + VERIFY_BITS(d, 64); + /* [d t4 t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */ + tx = (t4 >> 48); t4 &= (M >> 4); + VERIFY_BITS(tx, 4); + VERIFY_BITS(t4, 48); + /* [d t4+(tx<<48) t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */ + + c = (uint128_t)a0 * b[0]; + VERIFY_BITS(c, 112); + /* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 0 p4 p3 0 0 p0] */ + d += (uint128_t)a1 * b[4] + + (uint128_t)a2 * b[3] + + (uint128_t)a3 * b[2] + + (uint128_t)a4 * b[1]; + VERIFY_BITS(d, 115); + /* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */ + u0 = d & M; d >>= 52; + VERIFY_BITS(u0, 52); + VERIFY_BITS(d, 63); + /* [d u0 t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */ + /* [d 0 t4+(tx<<48)+(u0<<52) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */ + u0 = (u0 << 4) | tx; + VERIFY_BITS(u0, 56); + /* [d 0 t4+(u0<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */ + c += (uint128_t)u0 * (R >> 4); + VERIFY_BITS(c, 115); + /* [d 0 t4 t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */ + r[0] = c & M; c >>= 52; + VERIFY_BITS(r[0], 52); + VERIFY_BITS(c, 61); + /* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 0 p0] */ + + c += (uint128_t)a0 * b[1] + + (uint128_t)a1 * b[0]; + VERIFY_BITS(c, 114); + /* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 p1 p0] */ + d += (uint128_t)a2 * b[4] + + (uint128_t)a3 * b[3] + + (uint128_t)a4 * b[2]; + VERIFY_BITS(d, 114); + /* [d 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */ + c += (d & M) * R; d >>= 52; + VERIFY_BITS(c, 115); + VERIFY_BITS(d, 62); + /* [d 0 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */ + r[1] = c & M; c >>= 52; + VERIFY_BITS(r[1], 52); + VERIFY_BITS(c, 63); + /* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */ + + c += (uint128_t)a0 * b[2] + + (uint128_t)a1 * b[1] + + (uint128_t)a2 * b[0]; + VERIFY_BITS(c, 114); + /* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 p2 p1 p0] */ + d += (uint128_t)a3 * b[4] + + (uint128_t)a4 * b[3]; + VERIFY_BITS(d, 114); + /* [d 0 0 t4 t3 c t1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + c += (d & M) * R; d >>= 52; + VERIFY_BITS(c, 115); + VERIFY_BITS(d, 62); + /* [d 0 0 0 t4 t3 c r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + + /* [d 0 0 0 t4 t3 c r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + r[2] = c & M; c >>= 52; + VERIFY_BITS(r[2], 52); + VERIFY_BITS(c, 63); + /* [d 0 0 0 t4 t3+c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + c += d * R + t3;; + VERIFY_BITS(c, 100); + /* [t4 c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + r[3] = c & M; c >>= 52; + VERIFY_BITS(r[3], 52); + VERIFY_BITS(c, 48); + /* [t4+c r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + c += t4; + VERIFY_BITS(c, 49); + /* [c r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + r[4] = c; + VERIFY_BITS(r[4], 49); + /* [r4 r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ +} + +SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint64_t *r, const uint64_t *a) { + uint128_t c, d; + uint64_t a0 = a[0], a1 = a[1], a2 = a[2], a3 = a[3], a4 = a[4]; + int64_t t3, t4, tx, u0; + const uint64_t M = 0xFFFFFFFFFFFFFULL, R = 0x1000003D10ULL; + + VERIFY_BITS(a[0], 56); + VERIFY_BITS(a[1], 56); + VERIFY_BITS(a[2], 56); + VERIFY_BITS(a[3], 56); + VERIFY_BITS(a[4], 52); + + /** [... a b c] is a shorthand for ... + a<<104 + b<<52 + c<<0 mod n. + * px is a shorthand for sum(a[i]*a[x-i], i=0..x). + * Note that [x 0 0 0 0 0] = [x*R]. + */ + + d = (uint128_t)(a0*2) * a3 + + (uint128_t)(a1*2) * a2; + VERIFY_BITS(d, 114); + /* [d 0 0 0] = [p3 0 0 0] */ + c = (uint128_t)a4 * a4; + VERIFY_BITS(c, 112); + /* [c 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */ + d += (c & M) * R; c >>= 52; + VERIFY_BITS(d, 115); + VERIFY_BITS(c, 60); + /* [c 0 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */ + t3 = d & M; d >>= 52; + VERIFY_BITS(t3, 52); + VERIFY_BITS(d, 63); + /* [c 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */ + + a4 *= 2; + d += (uint128_t)a0 * a4 + + (uint128_t)(a1*2) * a3 + + (uint128_t)a2 * a2; + VERIFY_BITS(d, 115); + /* [c 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */ + d += c * R; + VERIFY_BITS(d, 116); + /* [d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */ + t4 = d & M; d >>= 52; + VERIFY_BITS(t4, 52); + VERIFY_BITS(d, 64); + /* [d t4 t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */ + tx = (t4 >> 48); t4 &= (M >> 4); + VERIFY_BITS(tx, 4); + VERIFY_BITS(t4, 48); + /* [d t4+(tx<<48) t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */ + + c = (uint128_t)a0 * a0; + VERIFY_BITS(c, 112); + /* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 0 p4 p3 0 0 p0] */ + d += (uint128_t)a1 * a4 + + (uint128_t)(a2*2) * a3; + VERIFY_BITS(d, 114); + /* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */ + u0 = d & M; d >>= 52; + VERIFY_BITS(u0, 52); + VERIFY_BITS(d, 62); + /* [d u0 t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */ + /* [d 0 t4+(tx<<48)+(u0<<52) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */ + u0 = (u0 << 4) | tx; + VERIFY_BITS(u0, 56); + /* [d 0 t4+(u0<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */ + c += (uint128_t)u0 * (R >> 4); + VERIFY_BITS(c, 113); + /* [d 0 t4 t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */ + r[0] = c & M; c >>= 52; + VERIFY_BITS(r[0], 52); + VERIFY_BITS(c, 61); + /* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 0 p0] */ + + a0 *= 2; + c += (uint128_t)a0 * a1; + VERIFY_BITS(c, 114); + /* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 p1 p0] */ + d += (uint128_t)a2 * a4 + + (uint128_t)a3 * a3; + VERIFY_BITS(d, 114); + /* [d 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */ + c += (d & M) * R; d >>= 52; + VERIFY_BITS(c, 115); + VERIFY_BITS(d, 62); + /* [d 0 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */ + r[1] = c & M; c >>= 52; + VERIFY_BITS(r[1], 52); + VERIFY_BITS(c, 63); + /* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */ + + c += (uint128_t)a0 * a2 + + (uint128_t)a1 * a1; + VERIFY_BITS(c, 114); + /* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 p2 p1 p0] */ + d += (uint128_t)a3 * a4; + VERIFY_BITS(d, 114); + /* [d 0 0 t4 t3 c r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + c += (d & M) * R; d >>= 52; + VERIFY_BITS(c, 115); + VERIFY_BITS(d, 62); + /* [d 0 0 0 t4 t3 c r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + r[2] = c & M; c >>= 52; + VERIFY_BITS(r[2], 52); + VERIFY_BITS(c, 63); + /* [d 0 0 0 t4 t3+c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + + c += d * R + t3;; + VERIFY_BITS(c, 100); + /* [t4 c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + r[3] = c & M; c >>= 52; + VERIFY_BITS(r[3], 52); + VERIFY_BITS(c, 48); + /* [t4+c r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + c += t4; + VERIFY_BITS(c, 49); + /* [c r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + r[4] = c; + VERIFY_BITS(r[4], 49); + /* [r4 r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ +} + +#endif diff --git a/bf/secp256k1/src/field_impl.h b/bf/secp256k1/src/field_impl.h new file mode 100644 index 0000000..6ccbf21 --- /dev/null +++ b/bf/secp256k1/src/field_impl.h @@ -0,0 +1,271 @@ +/********************************************************************** + * Copyright (c) 2013, 2014 Pieter Wuille * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#ifndef _SECP256K1_FIELD_IMPL_H_ +#define _SECP256K1_FIELD_IMPL_H_ + +#if defined HAVE_CONFIG_H +#include "libsecp256k1-config.h" +#endif + +#include "util.h" + +#if defined(USE_FIELD_10X26) +#include "field_10x26_impl.h" +#elif defined(USE_FIELD_5X52) +#include "field_5x52_impl.h" +#else +#error "Please select field implementation" +#endif + +SECP256K1_INLINE static int secp256k1_fe_equal_var(const secp256k1_fe_t *a, const secp256k1_fe_t *b) { + secp256k1_fe_t na; + secp256k1_fe_negate(&na, a, 1); + secp256k1_fe_add(&na, b); + return secp256k1_fe_normalizes_to_zero_var(&na); +} + +static int secp256k1_fe_sqrt_var(secp256k1_fe_t *r, const secp256k1_fe_t *a) { + secp256k1_fe_t x2, x3, x6, x9, x11, x22, x44, x88, x176, x220, x223, t1; + int j; + + /** The binary representation of (p + 1)/4 has 3 blocks of 1s, with lengths in + * { 2, 22, 223 }. Use an addition chain to calculate 2^n - 1 for each block: + * 1, [2], 3, 6, 9, 11, [22], 44, 88, 176, 220, [223] + */ + + secp256k1_fe_sqr(&x2, a); + secp256k1_fe_mul(&x2, &x2, a); + + secp256k1_fe_sqr(&x3, &x2); + secp256k1_fe_mul(&x3, &x3, a); + + x6 = x3; + for (j=0; j<3; j++) { + secp256k1_fe_sqr(&x6, &x6); + } + secp256k1_fe_mul(&x6, &x6, &x3); + + x9 = x6; + for (j=0; j<3; j++) { + secp256k1_fe_sqr(&x9, &x9); + } + secp256k1_fe_mul(&x9, &x9, &x3); + + x11 = x9; + for (j=0; j<2; j++) { + secp256k1_fe_sqr(&x11, &x11); + } + secp256k1_fe_mul(&x11, &x11, &x2); + + x22 = x11; + for (j=0; j<11; j++) { + secp256k1_fe_sqr(&x22, &x22); + } + secp256k1_fe_mul(&x22, &x22, &x11); + + x44 = x22; + for (j=0; j<22; j++) { + secp256k1_fe_sqr(&x44, &x44); + } + secp256k1_fe_mul(&x44, &x44, &x22); + + x88 = x44; + for (j=0; j<44; j++) { + secp256k1_fe_sqr(&x88, &x88); + } + secp256k1_fe_mul(&x88, &x88, &x44); + + x176 = x88; + for (j=0; j<88; j++) { + secp256k1_fe_sqr(&x176, &x176); + } + secp256k1_fe_mul(&x176, &x176, &x88); + + x220 = x176; + for (j=0; j<44; j++) { + secp256k1_fe_sqr(&x220, &x220); + } + secp256k1_fe_mul(&x220, &x220, &x44); + + x223 = x220; + for (j=0; j<3; j++) { + secp256k1_fe_sqr(&x223, &x223); + } + secp256k1_fe_mul(&x223, &x223, &x3); + + /* The final result is then assembled using a sliding window over the blocks. */ + + t1 = x223; + for (j=0; j<23; j++) { + secp256k1_fe_sqr(&t1, &t1); + } + secp256k1_fe_mul(&t1, &t1, &x22); + for (j=0; j<6; j++) { + secp256k1_fe_sqr(&t1, &t1); + } + secp256k1_fe_mul(&t1, &t1, &x2); + secp256k1_fe_sqr(&t1, &t1); + secp256k1_fe_sqr(r, &t1); + + /* Check that a square root was actually calculated */ + + secp256k1_fe_sqr(&t1, r); + return secp256k1_fe_equal_var(&t1, a); +} + +static void secp256k1_fe_inv(secp256k1_fe_t *r, const secp256k1_fe_t *a) { + secp256k1_fe_t x2, x3, x6, x9, x11, x22, x44, x88, x176, x220, x223, t1; + int j; + + /** The binary representation of (p - 2) has 5 blocks of 1s, with lengths in + * { 1, 2, 22, 223 }. Use an addition chain to calculate 2^n - 1 for each block: + * [1], [2], 3, 6, 9, 11, [22], 44, 88, 176, 220, [223] + */ + + secp256k1_fe_sqr(&x2, a); + secp256k1_fe_mul(&x2, &x2, a); + + secp256k1_fe_sqr(&x3, &x2); + secp256k1_fe_mul(&x3, &x3, a); + + x6 = x3; + for (j=0; j<3; j++) { + secp256k1_fe_sqr(&x6, &x6); + } + secp256k1_fe_mul(&x6, &x6, &x3); + + x9 = x6; + for (j=0; j<3; j++) { + secp256k1_fe_sqr(&x9, &x9); + } + secp256k1_fe_mul(&x9, &x9, &x3); + + x11 = x9; + for (j=0; j<2; j++) { + secp256k1_fe_sqr(&x11, &x11); + } + secp256k1_fe_mul(&x11, &x11, &x2); + + x22 = x11; + for (j=0; j<11; j++) { + secp256k1_fe_sqr(&x22, &x22); + } + secp256k1_fe_mul(&x22, &x22, &x11); + + x44 = x22; + for (j=0; j<22; j++) { + secp256k1_fe_sqr(&x44, &x44); + } + secp256k1_fe_mul(&x44, &x44, &x22); + + x88 = x44; + for (j=0; j<44; j++) { + secp256k1_fe_sqr(&x88, &x88); + } + secp256k1_fe_mul(&x88, &x88, &x44); + + x176 = x88; + for (j=0; j<88; j++) { + secp256k1_fe_sqr(&x176, &x176); + } + secp256k1_fe_mul(&x176, &x176, &x88); + + x220 = x176; + for (j=0; j<44; j++) { + secp256k1_fe_sqr(&x220, &x220); + } + secp256k1_fe_mul(&x220, &x220, &x44); + + x223 = x220; + for (j=0; j<3; j++) { + secp256k1_fe_sqr(&x223, &x223); + } + secp256k1_fe_mul(&x223, &x223, &x3); + + /* The final result is then assembled using a sliding window over the blocks. */ + + t1 = x223; + for (j=0; j<23; j++) { + secp256k1_fe_sqr(&t1, &t1); + } + secp256k1_fe_mul(&t1, &t1, &x22); + for (j=0; j<5; j++) { + secp256k1_fe_sqr(&t1, &t1); + } + secp256k1_fe_mul(&t1, &t1, a); + for (j=0; j<3; j++) { + secp256k1_fe_sqr(&t1, &t1); + } + secp256k1_fe_mul(&t1, &t1, &x2); + for (j=0; j<2; j++) { + secp256k1_fe_sqr(&t1, &t1); + } + secp256k1_fe_mul(r, a, &t1); +} + +static void secp256k1_fe_inv_var(secp256k1_fe_t *r, const secp256k1_fe_t *a) { +#if defined(USE_FIELD_INV_BUILTIN) + secp256k1_fe_inv(r, a); +#elif defined(USE_FIELD_INV_NUM) + secp256k1_num_t n, m; + static const secp256k1_fe_t negone = SECP256K1_FE_CONST( + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFC2E + ); + /* secp256k1 field prime, value p defined in "Standards for Efficient Cryptography" (SEC2) 2.7.1. */ + static const unsigned char prime[32] = { + 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, + 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, + 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, + 0xFF,0xFF,0xFF,0xFE,0xFF,0xFF,0xFC,0x2F + }; + unsigned char b[32]; + secp256k1_fe_t c = *a; + secp256k1_fe_normalize_var(&c); + secp256k1_fe_get_b32(b, &c); + secp256k1_num_set_bin(&n, b, 32); + secp256k1_num_set_bin(&m, prime, 32); + secp256k1_num_mod_inverse(&n, &n, &m); + secp256k1_num_get_bin(b, 32, &n); + VERIFY_CHECK(secp256k1_fe_set_b32(r, b)); + /* Verify the result is the (unique) valid inverse using non-GMP code. */ + secp256k1_fe_mul(&c, &c, r); + secp256k1_fe_add(&c, &negone); + CHECK(secp256k1_fe_normalizes_to_zero_var(&c)); +#else +#error "Please select field inverse implementation" +#endif +} + +static void secp256k1_fe_inv_all_var(size_t len, secp256k1_fe_t *r, const secp256k1_fe_t *a) { + secp256k1_fe_t u; + size_t i; + if (len < 1) { + return; + } + + VERIFY_CHECK((r + len <= a) || (a + len <= r)); + + r[0] = a[0]; + + i = 0; + while (++i < len) { + secp256k1_fe_mul(&r[i], &r[i - 1], &a[i]); + } + + secp256k1_fe_inv_var(&u, &r[--i]); + + while (i > 0) { + int j = i--; + secp256k1_fe_mul(&r[j], &r[i], &u); + secp256k1_fe_mul(&u, &u, &a[j]); + } + + r[0] = u; +} + +#endif diff --git a/bf/secp256k1/src/gen_context.c b/bf/secp256k1/src/gen_context.c new file mode 100644 index 0000000..e5c987d --- /dev/null +++ b/bf/secp256k1/src/gen_context.c @@ -0,0 +1,74 @@ +/********************************************************************** + * Copyright (c) 2013, 2014, 2015 Thomas Daede, Cory Fields * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#define USE_BASIC_CONFIG 1 + +#include "basic-config.h" +#include "include/secp256k1.h" +#include "field_impl.h" +#include "scalar_impl.h" +#include "group_impl.h" +#include "ecmult_gen_impl.h" + +static void default_error_callback_fn(const char* str, void* data) { + (void)data; + fprintf(stderr, "[libsecp256k1] internal consistency check failed: %s\n", str); + abort(); +} + +static const callback_t default_error_callback = { + default_error_callback_fn, + NULL +}; + +int main(int argc, char **argv) { + secp256k1_ecmult_gen_context_t ctx; + int inner; + int outer; + FILE* fp; + + (void)argc; + (void)argv; + + fp = fopen("src/ecmult_static_context.h","w"); + if (fp == NULL) { + fprintf(stderr, "Could not open src/ecmult_static_context.h for writing!\n"); + return -1; + } + + fprintf(fp, "#ifndef _SECP256K1_ECMULT_STATIC_CONTEXT_\n"); + fprintf(fp, "#define _SECP256K1_ECMULT_STATIC_CONTEXT_\n"); + fprintf(fp, "#include \"group.h\"\n"); + fprintf(fp, "#define SC SECP256K1_GE_STORAGE_CONST\n"); + fprintf(fp, "static const secp256k1_ge_storage_t secp256k1_ecmult_static_context[64][16] = {\n"); + + secp256k1_ecmult_gen_context_init(&ctx); + secp256k1_ecmult_gen_context_build(&ctx, &default_error_callback); + for(outer = 0; outer != 64; outer++) { + fprintf(fp,"{\n"); + for(inner = 0; inner != 16; inner++) { + fprintf(fp," SC(%uu, %uu, %uu, %uu, %uu, %uu, %uu, %uu, %uu, %uu, %uu, %uu, %uu, %uu, %uu, %uu)", SECP256K1_GE_STORAGE_CONST_GET((*ctx.prec)[outer][inner])); + if (inner != 15) { + fprintf(fp,",\n"); + } else { + fprintf(fp,"\n"); + } + } + if (outer != 63) { + fprintf(fp,"},\n"); + } else { + fprintf(fp,"}\n"); + } + } + fprintf(fp,"};\n"); + secp256k1_ecmult_gen_context_clear(&ctx); + + fprintf(fp, "#undef SC\n"); + fprintf(fp, "#endif\n"); + fclose(fp); + + return 0; +} diff --git a/bf/secp256k1/src/group.h b/bf/secp256k1/src/group.h new file mode 100644 index 0000000..4839891 --- /dev/null +++ b/bf/secp256k1/src/group.h @@ -0,0 +1,141 @@ +/********************************************************************** + * Copyright (c) 2013, 2014 Pieter Wuille * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#ifndef _SECP256K1_GROUP_ +#define _SECP256K1_GROUP_ + +#include "num.h" +#include "field.h" + +/** A group element of the secp256k1 curve, in affine coordinates. */ +typedef struct { + secp256k1_fe_t x; + secp256k1_fe_t y; + int infinity; /* whether this represents the point at infinity */ +} secp256k1_ge_t; + +#define SECP256K1_GE_CONST(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) {SECP256K1_FE_CONST((a),(b),(c),(d),(e),(f),(g),(h)), SECP256K1_FE_CONST((i),(j),(k),(l),(m),(n),(o),(p)), 0} +#define SECP256K1_GE_CONST_INFINITY {SECP256K1_FE_CONST(0, 0, 0, 0, 0, 0, 0, 0), SECP256K1_FE_CONST(0, 0, 0, 0, 0, 0, 0, 0), 1} + +/** A group element of the secp256k1 curve, in jacobian coordinates. */ +typedef struct { + secp256k1_fe_t x; /* actual X: x/z^2 */ + secp256k1_fe_t y; /* actual Y: y/z^3 */ + secp256k1_fe_t z; + int infinity; /* whether this represents the point at infinity */ +} secp256k1_gej_t; + +#define SECP256K1_GEJ_CONST(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) {SECP256K1_FE_CONST((a),(b),(c),(d),(e),(f),(g),(h)), SECP256K1_FE_CONST((i),(j),(k),(l),(m),(n),(o),(p)), SECP256K1_FE_CONST(0, 0, 0, 0, 0, 0, 0, 1), 0} +#define SECP256K1_GEJ_CONST_INFINITY {SECP256K1_FE_CONST(0, 0, 0, 0, 0, 0, 0, 0), SECP256K1_FE_CONST(0, 0, 0, 0, 0, 0, 0, 0), SECP256K1_FE_CONST(0, 0, 0, 0, 0, 0, 0, 0), 1} + +typedef struct { + secp256k1_fe_storage_t x; + secp256k1_fe_storage_t y; +} secp256k1_ge_storage_t; + +#define SECP256K1_GE_STORAGE_CONST(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) {SECP256K1_FE_STORAGE_CONST((a),(b),(c),(d),(e),(f),(g),(h)), SECP256K1_FE_STORAGE_CONST((i),(j),(k),(l),(m),(n),(o),(p))} + +#define SECP256K1_GE_STORAGE_CONST_GET(t) SECP256K1_FE_STORAGE_CONST_GET(t.x), SECP256K1_FE_STORAGE_CONST_GET(t.y) + +/** Set a group element equal to the point at infinity */ +static void secp256k1_ge_set_infinity(secp256k1_ge_t *r); + +/** Set a group element equal to the point with given X and Y coordinates */ +static void secp256k1_ge_set_xy(secp256k1_ge_t *r, const secp256k1_fe_t *x, const secp256k1_fe_t *y); + +/** Set a group element (affine) equal to the point with the given X coordinate, and given oddness + * for Y. Return value indicates whether the result is valid. */ +static int secp256k1_ge_set_xo_var(secp256k1_ge_t *r, const secp256k1_fe_t *x, int odd); + +/** Check whether a group element is the point at infinity. */ +static int secp256k1_ge_is_infinity(const secp256k1_ge_t *a); + +/** Check whether a group element is valid (i.e., on the curve). */ +static int secp256k1_ge_is_valid_var(const secp256k1_ge_t *a); + +static void secp256k1_ge_neg(secp256k1_ge_t *r, const secp256k1_ge_t *a); + +/** Set a group element equal to another which is given in jacobian coordinates */ +static void secp256k1_ge_set_gej(secp256k1_ge_t *r, secp256k1_gej_t *a); + +/** Set a batch of group elements equal to the inputs given in jacobian coordinates */ +static void secp256k1_ge_set_all_gej_var(size_t len, secp256k1_ge_t *r, const secp256k1_gej_t *a, const callback_t *cb); + +/** Set a batch of group elements equal to the inputs given in jacobian + * coordinates (with known z-ratios). zr must contain the known z-ratios such + * that mul(a[i].z, zr[i+1]) == a[i+1].z. zr[0] is ignored. */ +static void secp256k1_ge_set_table_gej_var(size_t len, secp256k1_ge_t *r, const secp256k1_gej_t *a, const secp256k1_fe_t *zr); + +/** Bring a batch inputs given in jacobian coordinates (with known z-ratios) to + * the same global z "denominator". zr must contain the known z-ratios such + * that mul(a[i].z, zr[i+1]) == a[i+1].z. zr[0] is ignored. The x and y + * coordinates of the result are stored in r, the common z coordinate is + * stored in globalz. */ +static void secp256k1_ge_globalz_set_table_gej(size_t len, secp256k1_ge_t *r, secp256k1_fe_t *globalz, const secp256k1_gej_t *a, const secp256k1_fe_t *zr); + +/** Set a group element (jacobian) equal to the point at infinity. */ +static void secp256k1_gej_set_infinity(secp256k1_gej_t *r); + +/** Set a group element (jacobian) equal to the point with given X and Y coordinates. */ +static void secp256k1_gej_set_xy(secp256k1_gej_t *r, const secp256k1_fe_t *x, const secp256k1_fe_t *y); + +/** Set a group element (jacobian) equal to another which is given in affine coordinates. */ +static void secp256k1_gej_set_ge(secp256k1_gej_t *r, const secp256k1_ge_t *a); + +/** Compare the X coordinate of a group element (jacobian). */ +static int secp256k1_gej_eq_x_var(const secp256k1_fe_t *x, const secp256k1_gej_t *a); + +/** Set r equal to the inverse of a (i.e., mirrored around the X axis) */ +static void secp256k1_gej_neg(secp256k1_gej_t *r, const secp256k1_gej_t *a); + +/** Check whether a group element is the point at infinity. */ +static int secp256k1_gej_is_infinity(const secp256k1_gej_t *a); + +/** Set r equal to the double of a. If rzr is not-NULL, r->z = a->z * *rzr (where infinity means an implicit z = 0). + * a may not be zero. Constant time. */ +static void secp256k1_gej_double_nonzero(secp256k1_gej_t *r, const secp256k1_gej_t *a, secp256k1_fe_t *rzr); + +/** Set r equal to the double of a. If rzr is not-NULL, r->z = a->z * *rzr (where infinity means an implicit z = 0). */ +static void secp256k1_gej_double_var(secp256k1_gej_t *r, const secp256k1_gej_t *a, secp256k1_fe_t *rzr); + +/** Set r equal to the sum of a and b. If rzr is non-NULL, r->z = a->z * *rzr (a cannot be infinity in that case). */ +static void secp256k1_gej_add_var(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_gej_t *b, secp256k1_fe_t *rzr); + +/** Set r equal to the sum of a and b (with b given in affine coordinates, and not infinity). */ +static void secp256k1_gej_add_ge(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_ge_t *b); + +/** Set r equal to the sum of a and b (with b given in affine coordinates). This is more efficient + than secp256k1_gej_add_var. It is identical to secp256k1_gej_add_ge but without constant-time + guarantee, and b is allowed to be infinity. If rzr is non-NULL, r->z = a->z * *rzr (a cannot be infinity in that case). */ +static void secp256k1_gej_add_ge_var(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_ge_t *b, secp256k1_fe_t *rzr); + +/** Set r equal to the sum of a and b (with the inverse of b's Z coordinate passed as bzinv). */ +static void secp256k1_gej_add_zinv_var(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_ge_t *b, const secp256k1_fe_t *bzinv); + +#ifdef USE_ENDOMORPHISM +/** Set r to be equal to lambda times a, where lambda is chosen in a way such that this is very fast. */ +static void secp256k1_ge_mul_lambda(secp256k1_ge_t *r, const secp256k1_ge_t *a); +#endif + +/** Clear a secp256k1_gej_t to prevent leaking sensitive information. */ +static void secp256k1_gej_clear(secp256k1_gej_t *r); + +/** Clear a secp256k1_ge_t to prevent leaking sensitive information. */ +static void secp256k1_ge_clear(secp256k1_ge_t *r); + +/** Convert a group element to the storage type. */ +static void secp256k1_ge_to_storage(secp256k1_ge_storage_t *r, const secp256k1_ge_t*); + +/** Convert a group element back from the storage type. */ +static void secp256k1_ge_from_storage(secp256k1_ge_t *r, const secp256k1_ge_storage_t*); + +/** If flag is true, set *r equal to *a; otherwise leave it. Constant-time. */ +static void secp256k1_ge_storage_cmov(secp256k1_ge_storage_t *r, const secp256k1_ge_storage_t *a, int flag); + +/** Rescale a jacobian point by b which must be non-zero. Constant-time. */ +static void secp256k1_gej_rescale(secp256k1_gej_t *r, const secp256k1_fe_t *b); + +#endif diff --git a/bf/secp256k1/src/group_impl.h b/bf/secp256k1/src/group_impl.h new file mode 100644 index 0000000..165eba2 --- /dev/null +++ b/bf/secp256k1/src/group_impl.h @@ -0,0 +1,634 @@ +/********************************************************************** + * Copyright (c) 2013, 2014 Pieter Wuille * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#ifndef _SECP256K1_GROUP_IMPL_H_ +#define _SECP256K1_GROUP_IMPL_H_ + +#include + +#include "num.h" +#include "field.h" +#include "group.h" + +/** Generator for secp256k1, value 'g' defined in + * "Standards for Efficient Cryptography" (SEC2) 2.7.1. + */ +static const secp256k1_ge_t secp256k1_ge_const_g = SECP256K1_GE_CONST( + 0x79BE667EUL, 0xF9DCBBACUL, 0x55A06295UL, 0xCE870B07UL, + 0x029BFCDBUL, 0x2DCE28D9UL, 0x59F2815BUL, 0x16F81798UL, + 0x483ADA77UL, 0x26A3C465UL, 0x5DA4FBFCUL, 0x0E1108A8UL, + 0xFD17B448UL, 0xA6855419UL, 0x9C47D08FUL, 0xFB10D4B8UL +); + +static void secp256k1_ge_set_gej_zinv(secp256k1_ge_t *r, const secp256k1_gej_t *a, const secp256k1_fe_t *zi) { + secp256k1_fe_t zi2; + secp256k1_fe_t zi3; + secp256k1_fe_sqr(&zi2, zi); + secp256k1_fe_mul(&zi3, &zi2, zi); + secp256k1_fe_mul(&r->x, &a->x, &zi2); + secp256k1_fe_mul(&r->y, &a->y, &zi3); + r->infinity = a->infinity; +} + +static void secp256k1_ge_set_infinity(secp256k1_ge_t *r) { + r->infinity = 1; +} + +static void secp256k1_ge_set_xy(secp256k1_ge_t *r, const secp256k1_fe_t *x, const secp256k1_fe_t *y) { + r->infinity = 0; + r->x = *x; + r->y = *y; +} + +static int secp256k1_ge_is_infinity(const secp256k1_ge_t *a) { + return a->infinity; +} + +static void secp256k1_ge_neg(secp256k1_ge_t *r, const secp256k1_ge_t *a) { + *r = *a; + secp256k1_fe_normalize_weak(&r->y); + secp256k1_fe_negate(&r->y, &r->y, 1); +} + +static void secp256k1_ge_set_gej(secp256k1_ge_t *r, secp256k1_gej_t *a) { + secp256k1_fe_t z2, z3; + r->infinity = a->infinity; + secp256k1_fe_inv(&a->z, &a->z); + secp256k1_fe_sqr(&z2, &a->z); + secp256k1_fe_mul(&z3, &a->z, &z2); + secp256k1_fe_mul(&a->x, &a->x, &z2); + secp256k1_fe_mul(&a->y, &a->y, &z3); + secp256k1_fe_set_int(&a->z, 1); + r->x = a->x; + r->y = a->y; +} + +static void secp256k1_ge_set_gej_var(secp256k1_ge_t *r, secp256k1_gej_t *a) { + secp256k1_fe_t z2, z3; + r->infinity = a->infinity; + if (a->infinity) { + return; + } + secp256k1_fe_inv_var(&a->z, &a->z); + secp256k1_fe_sqr(&z2, &a->z); + secp256k1_fe_mul(&z3, &a->z, &z2); + secp256k1_fe_mul(&a->x, &a->x, &z2); + secp256k1_fe_mul(&a->y, &a->y, &z3); + secp256k1_fe_set_int(&a->z, 1); + r->x = a->x; + r->y = a->y; +} + +static void secp256k1_ge_set_all_gej_var(size_t len, secp256k1_ge_t *r, const secp256k1_gej_t *a, const callback_t *cb) { + secp256k1_fe_t *az; + secp256k1_fe_t *azi; + size_t i; + size_t count = 0; + az = (secp256k1_fe_t *)checked_malloc(cb, sizeof(secp256k1_fe_t) * len); + for (i = 0; i < len; i++) { + if (!a[i].infinity) { + az[count++] = a[i].z; + } + } + + azi = (secp256k1_fe_t *)checked_malloc(cb, sizeof(secp256k1_fe_t) * count); + secp256k1_fe_inv_all_var(count, azi, az); + free(az); + + count = 0; + for (i = 0; i < len; i++) { + r[i].infinity = a[i].infinity; + if (!a[i].infinity) { + secp256k1_ge_set_gej_zinv(&r[i], &a[i], &azi[count++]); + } + } + free(azi); +} + +static void secp256k1_ge_set_table_gej_var(size_t len, secp256k1_ge_t *r, const secp256k1_gej_t *a, const secp256k1_fe_t *zr) { + size_t i = len - 1; + secp256k1_fe_t zi; + + if (len < 1) + return; + + /* Compute the inverse of the last z coordinate, and use it to compute the last affine output. */ + secp256k1_fe_inv(&zi, &a[i].z); + secp256k1_ge_set_gej_zinv(&r[i], &a[i], &zi); + + /* Work out way backwards, using the z-ratios to scale the x/y values. */ + while (i > 0) { + secp256k1_fe_mul(&zi, &zi, &zr[i]); + i--; + secp256k1_ge_set_gej_zinv(&r[i], &a[i], &zi); + } +} + +static void secp256k1_ge_globalz_set_table_gej(size_t len, secp256k1_ge_t *r, secp256k1_fe_t *globalz, const secp256k1_gej_t *a, const secp256k1_fe_t *zr) { + size_t i = len - 1; + secp256k1_fe_t zs; + + if (len < 1) + return; + + /* The z of the final point gives us the "global Z" for the table. */ + r[i].x = a[i].x; + r[i].y = a[i].y; + *globalz = a[i].z; + r[i].infinity = 0; + zs = zr[i]; + + /* Work our way backwards, using the z-ratios to scale the x/y values. */ + while (i > 0) { + if (i != len - 1) { + secp256k1_fe_mul(&zs, &zs, &zr[i]); + } + i--; + secp256k1_ge_set_gej_zinv(&r[i], &a[i], &zs); + } +} + +static void secp256k1_gej_set_infinity(secp256k1_gej_t *r) { + r->infinity = 1; + secp256k1_fe_set_int(&r->x, 0); + secp256k1_fe_set_int(&r->y, 0); + secp256k1_fe_set_int(&r->z, 0); +} + +static void secp256k1_gej_set_xy(secp256k1_gej_t *r, const secp256k1_fe_t *x, const secp256k1_fe_t *y) { + r->infinity = 0; + r->x = *x; + r->y = *y; + secp256k1_fe_set_int(&r->z, 1); +} + +static void secp256k1_gej_clear(secp256k1_gej_t *r) { + r->infinity = 0; + secp256k1_fe_clear(&r->x); + secp256k1_fe_clear(&r->y); + secp256k1_fe_clear(&r->z); +} + +static void secp256k1_ge_clear(secp256k1_ge_t *r) { + r->infinity = 0; + secp256k1_fe_clear(&r->x); + secp256k1_fe_clear(&r->y); +} + +static int secp256k1_ge_set_xo_var(secp256k1_ge_t *r, const secp256k1_fe_t *x, int odd) { + secp256k1_fe_t x2, x3, c; + r->x = *x; + secp256k1_fe_sqr(&x2, x); + secp256k1_fe_mul(&x3, x, &x2); + r->infinity = 0; + secp256k1_fe_set_int(&c, 7); + secp256k1_fe_add(&c, &x3); + if (!secp256k1_fe_sqrt_var(&r->y, &c)) { + return 0; + } + secp256k1_fe_normalize_var(&r->y); + if (secp256k1_fe_is_odd(&r->y) != odd) { + secp256k1_fe_negate(&r->y, &r->y, 1); + } + return 1; +} + +static void secp256k1_gej_set_ge(secp256k1_gej_t *r, const secp256k1_ge_t *a) { + r->infinity = a->infinity; + r->x = a->x; + r->y = a->y; + secp256k1_fe_set_int(&r->z, 1); +} + +static int secp256k1_gej_eq_x_var(const secp256k1_fe_t *x, const secp256k1_gej_t *a) { + secp256k1_fe_t r, r2; + VERIFY_CHECK(!a->infinity); + secp256k1_fe_sqr(&r, &a->z); secp256k1_fe_mul(&r, &r, x); + r2 = a->x; secp256k1_fe_normalize_weak(&r2); + return secp256k1_fe_equal_var(&r, &r2); +} + +static void secp256k1_gej_neg(secp256k1_gej_t *r, const secp256k1_gej_t *a) { + r->infinity = a->infinity; + r->x = a->x; + r->y = a->y; + r->z = a->z; + secp256k1_fe_normalize_weak(&r->y); + secp256k1_fe_negate(&r->y, &r->y, 1); +} + +static int secp256k1_gej_is_infinity(const secp256k1_gej_t *a) { + return a->infinity; +} + +static int secp256k1_gej_is_valid_var(const secp256k1_gej_t *a) { + secp256k1_fe_t y2, x3, z2, z6; + if (a->infinity) { + return 0; + } + /** y^2 = x^3 + 7 + * (Y/Z^3)^2 = (X/Z^2)^3 + 7 + * Y^2 / Z^6 = X^3 / Z^6 + 7 + * Y^2 = X^3 + 7*Z^6 + */ + secp256k1_fe_sqr(&y2, &a->y); + secp256k1_fe_sqr(&x3, &a->x); secp256k1_fe_mul(&x3, &x3, &a->x); + secp256k1_fe_sqr(&z2, &a->z); + secp256k1_fe_sqr(&z6, &z2); secp256k1_fe_mul(&z6, &z6, &z2); + secp256k1_fe_mul_int(&z6, 7); + secp256k1_fe_add(&x3, &z6); + secp256k1_fe_normalize_weak(&x3); + return secp256k1_fe_equal_var(&y2, &x3); +} + +static int secp256k1_ge_is_valid_var(const secp256k1_ge_t *a) { + secp256k1_fe_t y2, x3, c; + if (a->infinity) { + return 0; + } + /* y^2 = x^3 + 7 */ + secp256k1_fe_sqr(&y2, &a->y); + secp256k1_fe_sqr(&x3, &a->x); secp256k1_fe_mul(&x3, &x3, &a->x); + secp256k1_fe_set_int(&c, 7); + secp256k1_fe_add(&x3, &c); + secp256k1_fe_normalize_weak(&x3); + return secp256k1_fe_equal_var(&y2, &x3); +} + +static void secp256k1_gej_double_var(secp256k1_gej_t *r, const secp256k1_gej_t *a, secp256k1_fe_t *rzr) { + /* Operations: 3 mul, 4 sqr, 0 normalize, 12 mul_int/add/negate */ + secp256k1_fe_t t1,t2,t3,t4; + /** For secp256k1, 2Q is infinity if and only if Q is infinity. This is because if 2Q = infinity, + * Q must equal -Q, or that Q.y == -(Q.y), or Q.y is 0. For a point on y^2 = x^3 + 7 to have + * y=0, x^3 must be -7 mod p. However, -7 has no cube root mod p. + */ + r->infinity = a->infinity; + if (r->infinity) { + if (rzr) { + secp256k1_fe_set_int(rzr, 1); + } + return; + } + + if (rzr) { + *rzr = a->y; + secp256k1_fe_normalize_weak(rzr); + secp256k1_fe_mul_int(rzr, 2); + } + + secp256k1_fe_mul(&r->z, &a->z, &a->y); + secp256k1_fe_mul_int(&r->z, 2); /* Z' = 2*Y*Z (2) */ + secp256k1_fe_sqr(&t1, &a->x); + secp256k1_fe_mul_int(&t1, 3); /* T1 = 3*X^2 (3) */ + secp256k1_fe_sqr(&t2, &t1); /* T2 = 9*X^4 (1) */ + secp256k1_fe_sqr(&t3, &a->y); + secp256k1_fe_mul_int(&t3, 2); /* T3 = 2*Y^2 (2) */ + secp256k1_fe_sqr(&t4, &t3); + secp256k1_fe_mul_int(&t4, 2); /* T4 = 8*Y^4 (2) */ + secp256k1_fe_mul(&t3, &t3, &a->x); /* T3 = 2*X*Y^2 (1) */ + r->x = t3; + secp256k1_fe_mul_int(&r->x, 4); /* X' = 8*X*Y^2 (4) */ + secp256k1_fe_negate(&r->x, &r->x, 4); /* X' = -8*X*Y^2 (5) */ + secp256k1_fe_add(&r->x, &t2); /* X' = 9*X^4 - 8*X*Y^2 (6) */ + secp256k1_fe_negate(&t2, &t2, 1); /* T2 = -9*X^4 (2) */ + secp256k1_fe_mul_int(&t3, 6); /* T3 = 12*X*Y^2 (6) */ + secp256k1_fe_add(&t3, &t2); /* T3 = 12*X*Y^2 - 9*X^4 (8) */ + secp256k1_fe_mul(&r->y, &t1, &t3); /* Y' = 36*X^3*Y^2 - 27*X^6 (1) */ + secp256k1_fe_negate(&t2, &t4, 2); /* T2 = -8*Y^4 (3) */ + secp256k1_fe_add(&r->y, &t2); /* Y' = 36*X^3*Y^2 - 27*X^6 - 8*Y^4 (4) */ +} + +static SECP256K1_INLINE void secp256k1_gej_double_nonzero(secp256k1_gej_t *r, const secp256k1_gej_t *a, secp256k1_fe_t *rzr) { + VERIFY_CHECK(!secp256k1_gej_is_infinity(a)); + secp256k1_gej_double_var(r, a, rzr); +} + +static void secp256k1_gej_add_var(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_gej_t *b, secp256k1_fe_t *rzr) { + /* Operations: 12 mul, 4 sqr, 2 normalize, 12 mul_int/add/negate */ + secp256k1_fe_t z22, z12, u1, u2, s1, s2, h, i, i2, h2, h3, t; + + if (a->infinity) { + VERIFY_CHECK(rzr == NULL); + *r = *b; + return; + } + + if (b->infinity) { + if (rzr) { + secp256k1_fe_set_int(rzr, 1); + } + *r = *a; + return; + } + + r->infinity = 0; + secp256k1_fe_sqr(&z22, &b->z); + secp256k1_fe_sqr(&z12, &a->z); + secp256k1_fe_mul(&u1, &a->x, &z22); + secp256k1_fe_mul(&u2, &b->x, &z12); + secp256k1_fe_mul(&s1, &a->y, &z22); secp256k1_fe_mul(&s1, &s1, &b->z); + secp256k1_fe_mul(&s2, &b->y, &z12); secp256k1_fe_mul(&s2, &s2, &a->z); + secp256k1_fe_negate(&h, &u1, 1); secp256k1_fe_add(&h, &u2); + secp256k1_fe_negate(&i, &s1, 1); secp256k1_fe_add(&i, &s2); + if (secp256k1_fe_normalizes_to_zero_var(&h)) { + if (secp256k1_fe_normalizes_to_zero_var(&i)) { + secp256k1_gej_double_var(r, a, rzr); + } else { + if (rzr) { + secp256k1_fe_set_int(rzr, 0); + } + r->infinity = 1; + } + return; + } + secp256k1_fe_sqr(&i2, &i); + secp256k1_fe_sqr(&h2, &h); + secp256k1_fe_mul(&h3, &h, &h2); + secp256k1_fe_mul(&h, &h, &b->z); + if (rzr) { + *rzr = h; + } + secp256k1_fe_mul(&r->z, &a->z, &h); + secp256k1_fe_mul(&t, &u1, &h2); + r->x = t; secp256k1_fe_mul_int(&r->x, 2); secp256k1_fe_add(&r->x, &h3); secp256k1_fe_negate(&r->x, &r->x, 3); secp256k1_fe_add(&r->x, &i2); + secp256k1_fe_negate(&r->y, &r->x, 5); secp256k1_fe_add(&r->y, &t); secp256k1_fe_mul(&r->y, &r->y, &i); + secp256k1_fe_mul(&h3, &h3, &s1); secp256k1_fe_negate(&h3, &h3, 1); + secp256k1_fe_add(&r->y, &h3); +} + +static void secp256k1_gej_add_ge_var(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_ge_t *b, secp256k1_fe_t *rzr) { + /* 8 mul, 3 sqr, 4 normalize, 12 mul_int/add/negate */ + secp256k1_fe_t z12, u1, u2, s1, s2, h, i, i2, h2, h3, t; + if (a->infinity) { + VERIFY_CHECK(rzr == NULL); + secp256k1_gej_set_ge(r, b); + return; + } + if (b->infinity) { + if (rzr) { + secp256k1_fe_set_int(rzr, 1); + } + *r = *a; + return; + } + r->infinity = 0; + + secp256k1_fe_sqr(&z12, &a->z); + u1 = a->x; secp256k1_fe_normalize_weak(&u1); + secp256k1_fe_mul(&u2, &b->x, &z12); + s1 = a->y; secp256k1_fe_normalize_weak(&s1); + secp256k1_fe_mul(&s2, &b->y, &z12); secp256k1_fe_mul(&s2, &s2, &a->z); + secp256k1_fe_negate(&h, &u1, 1); secp256k1_fe_add(&h, &u2); + secp256k1_fe_negate(&i, &s1, 1); secp256k1_fe_add(&i, &s2); + if (secp256k1_fe_normalizes_to_zero_var(&h)) { + if (secp256k1_fe_normalizes_to_zero_var(&i)) { + secp256k1_gej_double_var(r, a, rzr); + } else { + if (rzr) { + secp256k1_fe_set_int(rzr, 0); + } + r->infinity = 1; + } + return; + } + secp256k1_fe_sqr(&i2, &i); + secp256k1_fe_sqr(&h2, &h); + secp256k1_fe_mul(&h3, &h, &h2); + if (rzr) { + *rzr = h; + } + secp256k1_fe_mul(&r->z, &a->z, &h); + secp256k1_fe_mul(&t, &u1, &h2); + r->x = t; secp256k1_fe_mul_int(&r->x, 2); secp256k1_fe_add(&r->x, &h3); secp256k1_fe_negate(&r->x, &r->x, 3); secp256k1_fe_add(&r->x, &i2); + secp256k1_fe_negate(&r->y, &r->x, 5); secp256k1_fe_add(&r->y, &t); secp256k1_fe_mul(&r->y, &r->y, &i); + secp256k1_fe_mul(&h3, &h3, &s1); secp256k1_fe_negate(&h3, &h3, 1); + secp256k1_fe_add(&r->y, &h3); +} + +static void secp256k1_gej_add_zinv_var(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_ge_t *b, const secp256k1_fe_t *bzinv) { + /* 9 mul, 3 sqr, 4 normalize, 12 mul_int/add/negate */ + secp256k1_fe_t az, z12, u1, u2, s1, s2, h, i, i2, h2, h3, t; + + if (b->infinity) { + *r = *a; + return; + } + if (a->infinity) { + secp256k1_fe_t bzinv2, bzinv3; + r->infinity = b->infinity; + secp256k1_fe_sqr(&bzinv2, bzinv); + secp256k1_fe_mul(&bzinv3, &bzinv2, bzinv); + secp256k1_fe_mul(&r->x, &b->x, &bzinv2); + secp256k1_fe_mul(&r->y, &b->y, &bzinv3); + secp256k1_fe_set_int(&r->z, 1); + return; + } + r->infinity = 0; + + /** We need to calculate (rx,ry,rz) = (ax,ay,az) + (bx,by,1/bzinv). Due to + * secp256k1's isomorphism we can multiply the Z coordinates on both sides + * by bzinv, and get: (rx,ry,rz*bzinv) = (ax,ay,az*bzinv) + (bx,by,1). + * This means that (rx,ry,rz) can be calculated as + * (ax,ay,az*bzinv) + (bx,by,1), when not applying the bzinv factor to rz. + * The variable az below holds the modified Z coordinate for a, which is used + * for the computation of rx and ry, but not for rz. + */ + secp256k1_fe_mul(&az, &a->z, bzinv); + + secp256k1_fe_sqr(&z12, &az); + u1 = a->x; secp256k1_fe_normalize_weak(&u1); + secp256k1_fe_mul(&u2, &b->x, &z12); + s1 = a->y; secp256k1_fe_normalize_weak(&s1); + secp256k1_fe_mul(&s2, &b->y, &z12); secp256k1_fe_mul(&s2, &s2, &az); + secp256k1_fe_negate(&h, &u1, 1); secp256k1_fe_add(&h, &u2); + secp256k1_fe_negate(&i, &s1, 1); secp256k1_fe_add(&i, &s2); + if (secp256k1_fe_normalizes_to_zero_var(&h)) { + if (secp256k1_fe_normalizes_to_zero_var(&i)) { + secp256k1_gej_double_var(r, a, NULL); + } else { + r->infinity = 1; + } + return; + } + secp256k1_fe_sqr(&i2, &i); + secp256k1_fe_sqr(&h2, &h); + secp256k1_fe_mul(&h3, &h, &h2); + r->z = a->z; secp256k1_fe_mul(&r->z, &r->z, &h); + secp256k1_fe_mul(&t, &u1, &h2); + r->x = t; secp256k1_fe_mul_int(&r->x, 2); secp256k1_fe_add(&r->x, &h3); secp256k1_fe_negate(&r->x, &r->x, 3); secp256k1_fe_add(&r->x, &i2); + secp256k1_fe_negate(&r->y, &r->x, 5); secp256k1_fe_add(&r->y, &t); secp256k1_fe_mul(&r->y, &r->y, &i); + secp256k1_fe_mul(&h3, &h3, &s1); secp256k1_fe_negate(&h3, &h3, 1); + secp256k1_fe_add(&r->y, &h3); +} + + +static void secp256k1_gej_add_ge(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_ge_t *b) { + /* Operations: 7 mul, 5 sqr, 4 normalize, 21 mul_int/add/negate/cmov */ + static const secp256k1_fe_t fe_1 = SECP256K1_FE_CONST(0, 0, 0, 0, 0, 0, 0, 1); + secp256k1_fe_t zz, u1, u2, s1, s2, t, tt, m, n, q, rr; + secp256k1_fe_t m_alt, rr_alt; + int infinity, degenerate; + VERIFY_CHECK(!b->infinity); + VERIFY_CHECK(a->infinity == 0 || a->infinity == 1); + + /** In: + * Eric Brier and Marc Joye, Weierstrass Elliptic Curves and Side-Channel Attacks. + * In D. Naccache and P. Paillier, Eds., Public Key Cryptography, vol. 2274 of Lecture Notes in Computer Science, pages 335-345. Springer-Verlag, 2002. + * we find as solution for a unified addition/doubling formula: + * lambda = ((x1 + x2)^2 - x1 * x2 + a) / (y1 + y2), with a = 0 for secp256k1's curve equation. + * x3 = lambda^2 - (x1 + x2) + * 2*y3 = lambda * (x1 + x2 - 2 * x3) - (y1 + y2). + * + * Substituting x_i = Xi / Zi^2 and yi = Yi / Zi^3, for i=1,2,3, gives: + * U1 = X1*Z2^2, U2 = X2*Z1^2 + * S1 = Y1*Z2^3, S2 = Y2*Z1^3 + * Z = Z1*Z2 + * T = U1+U2 + * M = S1+S2 + * Q = T*M^2 + * R = T^2-U1*U2 + * X3 = 4*(R^2-Q) + * Y3 = 4*(R*(3*Q-2*R^2)-M^4) + * Z3 = 2*M*Z + * (Note that the paper uses xi = Xi / Zi and yi = Yi / Zi instead.) + * + * This formula has the benefit of being the same for both addition + * of distinct points and doubling. However, it breaks down in the + * case that either point is infinity, or that y1 = -y2. We handle + * these cases in the following ways: + * + * - If b is infinity we simply bail by means of a VERIFY_CHECK. + * + * - If a is infinity, we detect this, and at the end of the + * computation replace the result (which will be meaningless, + * but we compute to be constant-time) with b.x : b.y : 1. + * + * - If a = -b, we have y1 = -y2, which is a degenerate case. + * But here the answer is infinity, so we simply set the + * infinity flag of the result, overriding the computed values + * without even needing to cmov. + * + * - If y1 = -y2 but x1 != x2, which does occur thanks to certain + * properties of our curve (specifically, 1 has nontrivial cube + * roots in our field, and the curve equation has no x coefficient) + * then the answer is not infinity but also not given by the above + * equation. In this case, we cmov in place an alternate expression + * for lambda. Specifically (y1 - y2)/(x1 - x2). Where both these + * expressions for lambda are defined, they are equal, and can be + * obtained from each other by multiplication by (y1 + y2)/(y1 + y2) + * then substitution of x^3 + 7 for y^2 (using the curve equation). + * For all pairs of nonzero points (a, b) at least one is defined, + * so this covers everything. + */ + + secp256k1_fe_sqr(&zz, &a->z); /* z = Z1^2 */ + u1 = a->x; secp256k1_fe_normalize_weak(&u1); /* u1 = U1 = X1*Z2^2 (1) */ + secp256k1_fe_mul(&u2, &b->x, &zz); /* u2 = U2 = X2*Z1^2 (1) */ + s1 = a->y; secp256k1_fe_normalize_weak(&s1); /* s1 = S1 = Y1*Z2^3 (1) */ + secp256k1_fe_mul(&s2, &b->y, &zz); /* s2 = Y2*Z2^2 (1) */ + secp256k1_fe_mul(&s2, &s2, &a->z); /* s2 = S2 = Y2*Z1^3 (1) */ + t = u1; secp256k1_fe_add(&t, &u2); /* t = T = U1+U2 (2) */ + m = s1; secp256k1_fe_add(&m, &s2); /* m = M = S1+S2 (2) */ + secp256k1_fe_sqr(&rr, &t); /* rr = T^2 (1) */ + secp256k1_fe_negate(&m_alt, &u2, 1); /* Malt = -X2*Z1^2 */ + secp256k1_fe_mul(&tt, &u1, &m_alt); /* tt = -U1*U2 (2) */ + secp256k1_fe_add(&rr, &tt); /* rr = R = T^2-U1*U2 (3) */ + /** If lambda = R/M = 0/0 we have a problem (except in the "trivial" + * case that Z = z1z2 = 0, and this is special-cased later on). */ + degenerate = secp256k1_fe_normalizes_to_zero(&m) & + secp256k1_fe_normalizes_to_zero(&rr); + /* This only occurs when y1 == -y2 and x1^3 == x2^3, but x1 != x2. + * This means either x1 == beta*x2 or beta*x1 == x2, where beta is + * a nontrivial cube root of one. In either case, an alternate + * non-indeterminate expression for lambda is (y1 - y2)/(x1 - x2), + * so we set R/M equal to this. */ + rr_alt = s1; + secp256k1_fe_mul_int(&rr_alt, 2); /* rr = Y1*Z2^3 - Y2*Z1^3 (2) */ + secp256k1_fe_add(&m_alt, &u1); /* Malt = X1*Z2^2 - X2*Z1^2 */ + + secp256k1_fe_cmov(&rr_alt, &rr, !degenerate); + secp256k1_fe_cmov(&m_alt, &m, !degenerate); + /* Now Ralt / Malt = lambda and is guaranteed not to be 0/0. + * From here on out Ralt and Malt represent the numerator + * and denominator of lambda; R and M represent the explicit + * expressions x1^2 + x2^2 + x1x2 and y1 + y2. */ + secp256k1_fe_sqr(&n, &m_alt); /* n = Malt^2 (1) */ + secp256k1_fe_mul(&q, &n, &t); /* q = Q = T*Malt^2 (1) */ + /* These two lines use the observation that either M == Malt or M == 0, + * so M^3 * Malt is either Malt^4 (which is computed by squaring), or + * zero (which is "computed" by cmov). So the cost is one squaring + * versus two multiplications. */ + secp256k1_fe_sqr(&n, &n); + secp256k1_fe_cmov(&n, &m, degenerate); /* n = M^3 * Malt (2) */ + secp256k1_fe_sqr(&t, &rr_alt); /* t = Ralt^2 (1) */ + secp256k1_fe_mul(&r->z, &a->z, &m_alt); /* r->z = Malt*Z (1) */ + infinity = secp256k1_fe_normalizes_to_zero(&r->z) * (1 - a->infinity); + secp256k1_fe_mul_int(&r->z, 2); /* r->z = Z3 = 2*Malt*Z (2) */ + secp256k1_fe_negate(&q, &q, 1); /* q = -Q (2) */ + secp256k1_fe_add(&t, &q); /* t = Ralt^2-Q (3) */ + secp256k1_fe_normalize_weak(&t); + r->x = t; /* r->x = Ralt^2-Q (1) */ + secp256k1_fe_mul_int(&t, 2); /* t = 2*x3 (2) */ + secp256k1_fe_add(&t, &q); /* t = 2*x3 - Q: (4) */ + secp256k1_fe_mul(&t, &t, &rr_alt); /* t = Ralt*(2*x3 - Q) (1) */ + secp256k1_fe_add(&t, &n); /* t = Ralt*(2*x3 - Q) + M^3*Malt (3) */ + secp256k1_fe_negate(&r->y, &t, 3); /* r->y = Ralt*(Q - 2x3) - M^3*Malt (4) */ + secp256k1_fe_normalize_weak(&r->y); + secp256k1_fe_mul_int(&r->x, 4); /* r->x = X3 = 4*(Ralt^2-Q) */ + secp256k1_fe_mul_int(&r->y, 4); /* r->y = Y3 = 4*Ralt*(Q - 2x3) - 4*M^3*Malt (4) */ + + /** In case a->infinity == 1, replace r with (b->x, b->y, 1). */ + secp256k1_fe_cmov(&r->x, &b->x, a->infinity); + secp256k1_fe_cmov(&r->y, &b->y, a->infinity); + secp256k1_fe_cmov(&r->z, &fe_1, a->infinity); + r->infinity = infinity; +} + +static void secp256k1_gej_rescale(secp256k1_gej_t *r, const secp256k1_fe_t *s) { + /* Operations: 4 mul, 1 sqr */ + secp256k1_fe_t zz; + VERIFY_CHECK(!secp256k1_fe_is_zero(s)); + secp256k1_fe_sqr(&zz, s); + secp256k1_fe_mul(&r->x, &r->x, &zz); /* r->x *= s^2 */ + secp256k1_fe_mul(&r->y, &r->y, &zz); + secp256k1_fe_mul(&r->y, &r->y, s); /* r->y *= s^3 */ + secp256k1_fe_mul(&r->z, &r->z, s); /* r->z *= s */ +} + +static void secp256k1_ge_to_storage(secp256k1_ge_storage_t *r, const secp256k1_ge_t *a) { + secp256k1_fe_t x, y; + VERIFY_CHECK(!a->infinity); + x = a->x; + secp256k1_fe_normalize(&x); + y = a->y; + secp256k1_fe_normalize(&y); + secp256k1_fe_to_storage(&r->x, &x); + secp256k1_fe_to_storage(&r->y, &y); +} + +static void secp256k1_ge_from_storage(secp256k1_ge_t *r, const secp256k1_ge_storage_t *a) { + secp256k1_fe_from_storage(&r->x, &a->x); + secp256k1_fe_from_storage(&r->y, &a->y); + r->infinity = 0; +} + +static SECP256K1_INLINE void secp256k1_ge_storage_cmov(secp256k1_ge_storage_t *r, const secp256k1_ge_storage_t *a, int flag) { + secp256k1_fe_storage_cmov(&r->x, &a->x, flag); + secp256k1_fe_storage_cmov(&r->y, &a->y, flag); +} + +#ifdef USE_ENDOMORPHISM +static void secp256k1_ge_mul_lambda(secp256k1_ge_t *r, const secp256k1_ge_t *a) { + static const secp256k1_fe_t beta = SECP256K1_FE_CONST( + 0x7ae96a2bul, 0x657c0710ul, 0x6e64479eul, 0xac3434e9ul, + 0x9cf04975ul, 0x12f58995ul, 0xc1396c28ul, 0x719501eeul + ); + *r = *a; + secp256k1_fe_mul(&r->x, &r->x, &beta); +} +#endif + +#endif diff --git a/bf/secp256k1/src/hash.h b/bf/secp256k1/src/hash.h new file mode 100644 index 0000000..0ff01e6 --- /dev/null +++ b/bf/secp256k1/src/hash.h @@ -0,0 +1,41 @@ +/********************************************************************** + * Copyright (c) 2014 Pieter Wuille * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#ifndef _SECP256K1_HASH_ +#define _SECP256K1_HASH_ + +#include +#include + +typedef struct { + uint32_t s[32]; + uint32_t buf[16]; /* In big endian */ + size_t bytes; +} secp256k1_sha256_t; + +static void secp256k1_sha256_initialize(secp256k1_sha256_t *hash); +static void secp256k1_sha256_write(secp256k1_sha256_t *hash, const unsigned char *data, size_t size); +static void secp256k1_sha256_finalize(secp256k1_sha256_t *hash, unsigned char *out32); + +typedef struct { + secp256k1_sha256_t inner, outer; +} secp256k1_hmac_sha256_t; + +static void secp256k1_hmac_sha256_initialize(secp256k1_hmac_sha256_t *hash, const unsigned char *key, size_t size); +static void secp256k1_hmac_sha256_write(secp256k1_hmac_sha256_t *hash, const unsigned char *data, size_t size); +static void secp256k1_hmac_sha256_finalize(secp256k1_hmac_sha256_t *hash, unsigned char *out32); + +typedef struct { + unsigned char v[32]; + unsigned char k[32]; + int retry; +} secp256k1_rfc6979_hmac_sha256_t; + +static void secp256k1_rfc6979_hmac_sha256_initialize(secp256k1_rfc6979_hmac_sha256_t *rng, const unsigned char *key, size_t keylen); +static void secp256k1_rfc6979_hmac_sha256_generate(secp256k1_rfc6979_hmac_sha256_t *rng, unsigned char *out, size_t outlen); +static void secp256k1_rfc6979_hmac_sha256_finalize(secp256k1_rfc6979_hmac_sha256_t *rng); + +#endif diff --git a/bf/secp256k1/src/hash_impl.h b/bf/secp256k1/src/hash_impl.h new file mode 100644 index 0000000..ae55df6 --- /dev/null +++ b/bf/secp256k1/src/hash_impl.h @@ -0,0 +1,283 @@ +/********************************************************************** + * Copyright (c) 2014 Pieter Wuille * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#ifndef _SECP256K1_HASH_IMPL_H_ +#define _SECP256K1_HASH_IMPL_H_ + +#include "hash.h" + +#include +#include +#include + +#define Ch(x,y,z) ((z) ^ ((x) & ((y) ^ (z)))) +#define Maj(x,y,z) (((x) & (y)) | ((z) & ((x) | (y)))) +#define Sigma0(x) (((x) >> 2 | (x) << 30) ^ ((x) >> 13 | (x) << 19) ^ ((x) >> 22 | (x) << 10)) +#define Sigma1(x) (((x) >> 6 | (x) << 26) ^ ((x) >> 11 | (x) << 21) ^ ((x) >> 25 | (x) << 7)) +#define sigma0(x) (((x) >> 7 | (x) << 25) ^ ((x) >> 18 | (x) << 14) ^ ((x) >> 3)) +#define sigma1(x) (((x) >> 17 | (x) << 15) ^ ((x) >> 19 | (x) << 13) ^ ((x) >> 10)) + +#define Round(a,b,c,d,e,f,g,h,k,w) do { \ + uint32_t t1 = (h) + Sigma1(e) + Ch((e), (f), (g)) + (k) + (w); \ + uint32_t t2 = Sigma0(a) + Maj((a), (b), (c)); \ + (d) += t1; \ + (h) = t1 + t2; \ +} while(0) + +#ifdef WORDS_BIGENDIAN +#define BE32(x) (x) +#else +#define BE32(p) ((((p) & 0xFF) << 24) | (((p) & 0xFF00) << 8) | (((p) & 0xFF0000) >> 8) | (((p) & 0xFF000000) >> 24)) +#endif + +static void secp256k1_sha256_initialize(secp256k1_sha256_t *hash) { + hash->s[0] = 0x6a09e667ul; + hash->s[1] = 0xbb67ae85ul; + hash->s[2] = 0x3c6ef372ul; + hash->s[3] = 0xa54ff53aul; + hash->s[4] = 0x510e527ful; + hash->s[5] = 0x9b05688cul; + hash->s[6] = 0x1f83d9abul; + hash->s[7] = 0x5be0cd19ul; + hash->bytes = 0; +} + +/** Perform one SHA-256 transformation, processing 16 big endian 32-bit words. */ +static void secp256k1_sha256_transform(uint32_t* s, const uint32_t* chunk) { + uint32_t a = s[0], b = s[1], c = s[2], d = s[3], e = s[4], f = s[5], g = s[6], h = s[7]; + uint32_t w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15; + + Round(a, b, c, d, e, f, g, h, 0x428a2f98, w0 = BE32(chunk[0])); + Round(h, a, b, c, d, e, f, g, 0x71374491, w1 = BE32(chunk[1])); + Round(g, h, a, b, c, d, e, f, 0xb5c0fbcf, w2 = BE32(chunk[2])); + Round(f, g, h, a, b, c, d, e, 0xe9b5dba5, w3 = BE32(chunk[3])); + Round(e, f, g, h, a, b, c, d, 0x3956c25b, w4 = BE32(chunk[4])); + Round(d, e, f, g, h, a, b, c, 0x59f111f1, w5 = BE32(chunk[5])); + Round(c, d, e, f, g, h, a, b, 0x923f82a4, w6 = BE32(chunk[6])); + Round(b, c, d, e, f, g, h, a, 0xab1c5ed5, w7 = BE32(chunk[7])); + Round(a, b, c, d, e, f, g, h, 0xd807aa98, w8 = BE32(chunk[8])); + Round(h, a, b, c, d, e, f, g, 0x12835b01, w9 = BE32(chunk[9])); + Round(g, h, a, b, c, d, e, f, 0x243185be, w10 = BE32(chunk[10])); + Round(f, g, h, a, b, c, d, e, 0x550c7dc3, w11 = BE32(chunk[11])); + Round(e, f, g, h, a, b, c, d, 0x72be5d74, w12 = BE32(chunk[12])); + Round(d, e, f, g, h, a, b, c, 0x80deb1fe, w13 = BE32(chunk[13])); + Round(c, d, e, f, g, h, a, b, 0x9bdc06a7, w14 = BE32(chunk[14])); + Round(b, c, d, e, f, g, h, a, 0xc19bf174, w15 = BE32(chunk[15])); + + Round(a, b, c, d, e, f, g, h, 0xe49b69c1, w0 += sigma1(w14) + w9 + sigma0(w1)); + Round(h, a, b, c, d, e, f, g, 0xefbe4786, w1 += sigma1(w15) + w10 + sigma0(w2)); + Round(g, h, a, b, c, d, e, f, 0x0fc19dc6, w2 += sigma1(w0) + w11 + sigma0(w3)); + Round(f, g, h, a, b, c, d, e, 0x240ca1cc, w3 += sigma1(w1) + w12 + sigma0(w4)); + Round(e, f, g, h, a, b, c, d, 0x2de92c6f, w4 += sigma1(w2) + w13 + sigma0(w5)); + Round(d, e, f, g, h, a, b, c, 0x4a7484aa, w5 += sigma1(w3) + w14 + sigma0(w6)); + Round(c, d, e, f, g, h, a, b, 0x5cb0a9dc, w6 += sigma1(w4) + w15 + sigma0(w7)); + Round(b, c, d, e, f, g, h, a, 0x76f988da, w7 += sigma1(w5) + w0 + sigma0(w8)); + Round(a, b, c, d, e, f, g, h, 0x983e5152, w8 += sigma1(w6) + w1 + sigma0(w9)); + Round(h, a, b, c, d, e, f, g, 0xa831c66d, w9 += sigma1(w7) + w2 + sigma0(w10)); + Round(g, h, a, b, c, d, e, f, 0xb00327c8, w10 += sigma1(w8) + w3 + sigma0(w11)); + Round(f, g, h, a, b, c, d, e, 0xbf597fc7, w11 += sigma1(w9) + w4 + sigma0(w12)); + Round(e, f, g, h, a, b, c, d, 0xc6e00bf3, w12 += sigma1(w10) + w5 + sigma0(w13)); + Round(d, e, f, g, h, a, b, c, 0xd5a79147, w13 += sigma1(w11) + w6 + sigma0(w14)); + Round(c, d, e, f, g, h, a, b, 0x06ca6351, w14 += sigma1(w12) + w7 + sigma0(w15)); + Round(b, c, d, e, f, g, h, a, 0x14292967, w15 += sigma1(w13) + w8 + sigma0(w0)); + + Round(a, b, c, d, e, f, g, h, 0x27b70a85, w0 += sigma1(w14) + w9 + sigma0(w1)); + Round(h, a, b, c, d, e, f, g, 0x2e1b2138, w1 += sigma1(w15) + w10 + sigma0(w2)); + Round(g, h, a, b, c, d, e, f, 0x4d2c6dfc, w2 += sigma1(w0) + w11 + sigma0(w3)); + Round(f, g, h, a, b, c, d, e, 0x53380d13, w3 += sigma1(w1) + w12 + sigma0(w4)); + Round(e, f, g, h, a, b, c, d, 0x650a7354, w4 += sigma1(w2) + w13 + sigma0(w5)); + Round(d, e, f, g, h, a, b, c, 0x766a0abb, w5 += sigma1(w3) + w14 + sigma0(w6)); + Round(c, d, e, f, g, h, a, b, 0x81c2c92e, w6 += sigma1(w4) + w15 + sigma0(w7)); + Round(b, c, d, e, f, g, h, a, 0x92722c85, w7 += sigma1(w5) + w0 + sigma0(w8)); + Round(a, b, c, d, e, f, g, h, 0xa2bfe8a1, w8 += sigma1(w6) + w1 + sigma0(w9)); + Round(h, a, b, c, d, e, f, g, 0xa81a664b, w9 += sigma1(w7) + w2 + sigma0(w10)); + Round(g, h, a, b, c, d, e, f, 0xc24b8b70, w10 += sigma1(w8) + w3 + sigma0(w11)); + Round(f, g, h, a, b, c, d, e, 0xc76c51a3, w11 += sigma1(w9) + w4 + sigma0(w12)); + Round(e, f, g, h, a, b, c, d, 0xd192e819, w12 += sigma1(w10) + w5 + sigma0(w13)); + Round(d, e, f, g, h, a, b, c, 0xd6990624, w13 += sigma1(w11) + w6 + sigma0(w14)); + Round(c, d, e, f, g, h, a, b, 0xf40e3585, w14 += sigma1(w12) + w7 + sigma0(w15)); + Round(b, c, d, e, f, g, h, a, 0x106aa070, w15 += sigma1(w13) + w8 + sigma0(w0)); + + Round(a, b, c, d, e, f, g, h, 0x19a4c116, w0 += sigma1(w14) + w9 + sigma0(w1)); + Round(h, a, b, c, d, e, f, g, 0x1e376c08, w1 += sigma1(w15) + w10 + sigma0(w2)); + Round(g, h, a, b, c, d, e, f, 0x2748774c, w2 += sigma1(w0) + w11 + sigma0(w3)); + Round(f, g, h, a, b, c, d, e, 0x34b0bcb5, w3 += sigma1(w1) + w12 + sigma0(w4)); + Round(e, f, g, h, a, b, c, d, 0x391c0cb3, w4 += sigma1(w2) + w13 + sigma0(w5)); + Round(d, e, f, g, h, a, b, c, 0x4ed8aa4a, w5 += sigma1(w3) + w14 + sigma0(w6)); + Round(c, d, e, f, g, h, a, b, 0x5b9cca4f, w6 += sigma1(w4) + w15 + sigma0(w7)); + Round(b, c, d, e, f, g, h, a, 0x682e6ff3, w7 += sigma1(w5) + w0 + sigma0(w8)); + Round(a, b, c, d, e, f, g, h, 0x748f82ee, w8 += sigma1(w6) + w1 + sigma0(w9)); + Round(h, a, b, c, d, e, f, g, 0x78a5636f, w9 += sigma1(w7) + w2 + sigma0(w10)); + Round(g, h, a, b, c, d, e, f, 0x84c87814, w10 += sigma1(w8) + w3 + sigma0(w11)); + Round(f, g, h, a, b, c, d, e, 0x8cc70208, w11 += sigma1(w9) + w4 + sigma0(w12)); + Round(e, f, g, h, a, b, c, d, 0x90befffa, w12 += sigma1(w10) + w5 + sigma0(w13)); + Round(d, e, f, g, h, a, b, c, 0xa4506ceb, w13 += sigma1(w11) + w6 + sigma0(w14)); + Round(c, d, e, f, g, h, a, b, 0xbef9a3f7, w14 + sigma1(w12) + w7 + sigma0(w15)); + Round(b, c, d, e, f, g, h, a, 0xc67178f2, w15 + sigma1(w13) + w8 + sigma0(w0)); + + s[0] += a; + s[1] += b; + s[2] += c; + s[3] += d; + s[4] += e; + s[5] += f; + s[6] += g; + s[7] += h; +} + +static void secp256k1_sha256_write(secp256k1_sha256_t *hash, const unsigned char *data, size_t len) { + size_t bufsize = hash->bytes & 0x3F; + hash->bytes += len; + while (bufsize + len >= 64) { + /* Fill the buffer, and process it. */ + memcpy(((unsigned char*)hash->buf) + bufsize, data, 64 - bufsize); + data += 64 - bufsize; + len -= 64 - bufsize; + secp256k1_sha256_transform(hash->s, hash->buf); + bufsize = 0; + } + if (len) { + /* Fill the buffer with what remains. */ + memcpy(((unsigned char*)hash->buf) + bufsize, data, len); + } +} + +static void secp256k1_sha256_finalize(secp256k1_sha256_t *hash, unsigned char *out32) { + static const unsigned char pad[64] = {0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + uint32_t sizedesc[2]; + uint32_t out[8]; + int i = 0; + sizedesc[0] = BE32(hash->bytes >> 29); + sizedesc[1] = BE32(hash->bytes << 3); + secp256k1_sha256_write(hash, pad, 1 + ((119 - (hash->bytes % 64)) % 64)); + secp256k1_sha256_write(hash, (const unsigned char*)sizedesc, 8); + for (i = 0; i < 8; i++) { + out[i] = BE32(hash->s[i]); + hash->s[i] = 0; + } + memcpy(out32, (const unsigned char*)out, 32); +} + +static void secp256k1_hmac_sha256_initialize(secp256k1_hmac_sha256_t *hash, const unsigned char *key, size_t keylen) { + int n; + unsigned char rkey[64]; + if (keylen <= 64) { + memcpy(rkey, key, keylen); + memset(rkey + keylen, 0, 64 - keylen); + } else { + secp256k1_sha256_t sha256; + secp256k1_sha256_initialize(&sha256); + secp256k1_sha256_write(&sha256, key, keylen); + secp256k1_sha256_finalize(&sha256, rkey); + memset(rkey + 32, 0, 32); + } + + secp256k1_sha256_initialize(&hash->outer); + for (n = 0; n < 64; n++) { + rkey[n] ^= 0x5c; + } + secp256k1_sha256_write(&hash->outer, rkey, 64); + + secp256k1_sha256_initialize(&hash->inner); + for (n = 0; n < 64; n++) { + rkey[n] ^= 0x5c ^ 0x36; + } + secp256k1_sha256_write(&hash->inner, rkey, 64); + memset(rkey, 0, 64); +} + +static void secp256k1_hmac_sha256_write(secp256k1_hmac_sha256_t *hash, const unsigned char *data, size_t size) { + secp256k1_sha256_write(&hash->inner, data, size); +} + +static void secp256k1_hmac_sha256_finalize(secp256k1_hmac_sha256_t *hash, unsigned char *out32) { + unsigned char temp[32]; + secp256k1_sha256_finalize(&hash->inner, temp); + secp256k1_sha256_write(&hash->outer, temp, 32); + memset(temp, 0, 32); + secp256k1_sha256_finalize(&hash->outer, out32); +} + + +static void secp256k1_rfc6979_hmac_sha256_initialize(secp256k1_rfc6979_hmac_sha256_t *rng, const unsigned char *key, size_t keylen) { + secp256k1_hmac_sha256_t hmac; + static const unsigned char zero[1] = {0x00}; + static const unsigned char one[1] = {0x01}; + + memset(rng->v, 0x01, 32); /* RFC6979 3.2.b. */ + memset(rng->k, 0x00, 32); /* RFC6979 3.2.c. */ + + /* RFC6979 3.2.d. */ + secp256k1_hmac_sha256_initialize(&hmac, rng->k, 32); + secp256k1_hmac_sha256_write(&hmac, rng->v, 32); + secp256k1_hmac_sha256_write(&hmac, zero, 1); + secp256k1_hmac_sha256_write(&hmac, key, keylen); + secp256k1_hmac_sha256_finalize(&hmac, rng->k); + secp256k1_hmac_sha256_initialize(&hmac, rng->k, 32); + secp256k1_hmac_sha256_write(&hmac, rng->v, 32); + secp256k1_hmac_sha256_finalize(&hmac, rng->v); + + /* RFC6979 3.2.f. */ + secp256k1_hmac_sha256_initialize(&hmac, rng->k, 32); + secp256k1_hmac_sha256_write(&hmac, rng->v, 32); + secp256k1_hmac_sha256_write(&hmac, one, 1); + secp256k1_hmac_sha256_write(&hmac, key, keylen); + secp256k1_hmac_sha256_finalize(&hmac, rng->k); + secp256k1_hmac_sha256_initialize(&hmac, rng->k, 32); + secp256k1_hmac_sha256_write(&hmac, rng->v, 32); + secp256k1_hmac_sha256_finalize(&hmac, rng->v); + rng->retry = 0; +} + +static void secp256k1_rfc6979_hmac_sha256_generate(secp256k1_rfc6979_hmac_sha256_t *rng, unsigned char *out, size_t outlen) { + /* RFC6979 3.2.h. */ + static const unsigned char zero[1] = {0x00}; + if (rng->retry) { + secp256k1_hmac_sha256_t hmac; + secp256k1_hmac_sha256_initialize(&hmac, rng->k, 32); + secp256k1_hmac_sha256_write(&hmac, rng->v, 32); + secp256k1_hmac_sha256_write(&hmac, zero, 1); + secp256k1_hmac_sha256_finalize(&hmac, rng->k); + secp256k1_hmac_sha256_initialize(&hmac, rng->k, 32); + secp256k1_hmac_sha256_write(&hmac, rng->v, 32); + secp256k1_hmac_sha256_finalize(&hmac, rng->v); + } + + while (outlen > 0) { + secp256k1_hmac_sha256_t hmac; + int now = outlen; + secp256k1_hmac_sha256_initialize(&hmac, rng->k, 32); + secp256k1_hmac_sha256_write(&hmac, rng->v, 32); + secp256k1_hmac_sha256_finalize(&hmac, rng->v); + if (now > 32) { + now = 32; + } + memcpy(out, rng->v, now); + out += now; + outlen -= now; + } + + rng->retry = 1; +} + +static void secp256k1_rfc6979_hmac_sha256_finalize(secp256k1_rfc6979_hmac_sha256_t *rng) { + memset(rng->k, 0, 32); + memset(rng->v, 0, 32); + rng->retry = 0; +} + + +#undef Round +#undef sigma0 +#undef sigma1 +#undef Sigma0 +#undef Sigma1 +#undef Ch +#undef Maj +#undef ReadBE32 +#undef WriteBE32 + +#endif diff --git a/bf/secp256k1/src/java/org/bitcoin/NativeSecp256k1.java b/bf/secp256k1/src/java/org/bitcoin/NativeSecp256k1.java new file mode 100644 index 0000000..90a498e --- /dev/null +++ b/bf/secp256k1/src/java/org/bitcoin/NativeSecp256k1.java @@ -0,0 +1,60 @@ +package org.bitcoin; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +import com.google.common.base.Preconditions; + + +/** + * This class holds native methods to handle ECDSA verification. + * You can find an example library that can be used for this at + * https://github.com/sipa/secp256k1 + */ +public class NativeSecp256k1 { + public static final boolean enabled; + static { + boolean isEnabled = true; + try { + System.loadLibrary("javasecp256k1"); + } catch (UnsatisfiedLinkError e) { + isEnabled = false; + } + enabled = isEnabled; + } + + private static ThreadLocal nativeECDSABuffer = new ThreadLocal(); + /** + * Verifies the given secp256k1 signature in native code. + * Calling when enabled == false is undefined (probably library not loaded) + * + * @param data The data which was signed, must be exactly 32 bytes + * @param signature The signature + * @param pub The public key which did the signing + */ + public static boolean verify(byte[] data, byte[] signature, byte[] pub) { + Preconditions.checkArgument(data.length == 32 && signature.length <= 520 && pub.length <= 520); + + ByteBuffer byteBuff = nativeECDSABuffer.get(); + if (byteBuff == null) { + byteBuff = ByteBuffer.allocateDirect(32 + 8 + 520 + 520); + byteBuff.order(ByteOrder.nativeOrder()); + nativeECDSABuffer.set(byteBuff); + } + byteBuff.rewind(); + byteBuff.put(data); + byteBuff.putInt(signature.length); + byteBuff.putInt(pub.length); + byteBuff.put(signature); + byteBuff.put(pub); + return secp256k1_ecdsa_verify(byteBuff) == 1; + } + + /** + * @param byteBuff signature format is byte[32] data, + * native-endian int signatureLength, native-endian int pubkeyLength, + * byte[signatureLength] signature, byte[pubkeyLength] pub + * @returns 1 for valid signature, anything else for invalid + */ + private static native int secp256k1_ecdsa_verify(ByteBuffer byteBuff); +} diff --git a/bf/secp256k1/src/java/org_bitcoin_NativeSecp256k1.c b/bf/secp256k1/src/java/org_bitcoin_NativeSecp256k1.c new file mode 100644 index 0000000..bb4cd70 --- /dev/null +++ b/bf/secp256k1/src/java/org_bitcoin_NativeSecp256k1.c @@ -0,0 +1,23 @@ +#include "org_bitcoin_NativeSecp256k1.h" +#include "include/secp256k1.h" + +JNIEXPORT jint JNICALL Java_org_bitcoin_NativeSecp256k1_secp256k1_1ecdsa_1verify + (JNIEnv* env, jclass classObject, jobject byteBufferObject) +{ + unsigned char* data = (unsigned char*) (*env)->GetDirectBufferAddress(env, byteBufferObject); + int sigLen = *((int*)(data + 32)); + int pubLen = *((int*)(data + 32 + 4)); + + return secp256k1_ecdsa_verify(data, 32, data+32+8, sigLen, data+32+8+sigLen, pubLen); +} + +static void __javasecp256k1_attach(void) __attribute__((constructor)); +static void __javasecp256k1_detach(void) __attribute__((destructor)); + +static void __javasecp256k1_attach(void) { + secp256k1_start(SECP256K1_START_VERIFY); +} + +static void __javasecp256k1_detach(void) { + secp256k1_stop(); +} diff --git a/bf/secp256k1/src/java/org_bitcoin_NativeSecp256k1.h b/bf/secp256k1/src/java/org_bitcoin_NativeSecp256k1.h new file mode 100644 index 0000000..d7fb004 --- /dev/null +++ b/bf/secp256k1/src/java/org_bitcoin_NativeSecp256k1.h @@ -0,0 +1,21 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_bitcoin_NativeSecp256k1 */ + +#ifndef _Included_org_bitcoin_NativeSecp256k1 +#define _Included_org_bitcoin_NativeSecp256k1 +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_bitcoin_NativeSecp256k1 + * Method: secp256k1_ecdsa_verify + * Signature: (Ljava/nio/ByteBuffer;)I + */ +JNIEXPORT jint JNICALL Java_org_bitcoin_NativeSecp256k1_secp256k1_1ecdsa_1verify + (JNIEnv *, jclass, jobject); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/bf/secp256k1/src/modules/ecdh/Makefile.am.include b/bf/secp256k1/src/modules/ecdh/Makefile.am.include new file mode 100644 index 0000000..8ef3aff --- /dev/null +++ b/bf/secp256k1/src/modules/ecdh/Makefile.am.include @@ -0,0 +1,9 @@ +include_HEADERS += include/secp256k1_ecdh.h +noinst_HEADERS += src/modules/ecdh/main_impl.h +noinst_HEADERS += src/modules/ecdh/tests_impl.h +if USE_BENCHMARK +noinst_PROGRAMS += bench_ecdh +bench_ecdh_SOURCES = src/bench_ecdh.c +bench_ecdh_LDADD = libsecp256k1.la $(SECP_LIBS) +bench_ecdh_LDFLAGS = -static +endif diff --git a/bf/secp256k1/src/modules/ecdh/main_impl.h b/bf/secp256k1/src/modules/ecdh/main_impl.h new file mode 100644 index 0000000..064cf6e --- /dev/null +++ b/bf/secp256k1/src/modules/ecdh/main_impl.h @@ -0,0 +1,53 @@ +/********************************************************************** + * Copyright (c) 2015 Andrew Poelstra * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#ifndef _SECP256K1_MODULE_ECDH_MAIN_ +#define _SECP256K1_MODULE_ECDH_MAIN_ + +#include "ecmult_const_impl.h" + +int secp256k1_ecdh(const secp256k1_context_t* ctx, unsigned char *result, const secp256k1_pubkey_t *point, const unsigned char *scalar) { + int ret = 0; + int overflow = 0; + secp256k1_gej_t res; + secp256k1_ge_t pt; + secp256k1_scalar_t s; + ARG_CHECK(result != NULL); + ARG_CHECK(point != NULL); + ARG_CHECK(scalar != NULL); + (void)ctx; + + secp256k1_pubkey_load(ctx, &pt, point); + secp256k1_scalar_set_b32(&s, scalar, &overflow); + if (overflow || secp256k1_scalar_is_zero(&s)) { + ret = 0; + } else { + unsigned char x[32]; + unsigned char y[1]; + secp256k1_sha256_t sha; + + secp256k1_ecmult_const(&res, &pt, &s); + secp256k1_ge_set_gej(&pt, &res); + /* Compute a hash of the point in compressed form + * Note we cannot use secp256k1_eckey_pubkey_serialize here since it does not + * expect its output to be secret and has a timing sidechannel. */ + secp256k1_fe_normalize(&pt.x); + secp256k1_fe_normalize(&pt.y); + secp256k1_fe_get_b32(x, &pt.x); + y[0] = 0x02 | secp256k1_fe_is_odd(&pt.y); + + secp256k1_sha256_initialize(&sha); + secp256k1_sha256_write(&sha, y, sizeof(y)); + secp256k1_sha256_write(&sha, x, sizeof(x)); + secp256k1_sha256_finalize(&sha, result); + ret = 1; + } + + secp256k1_scalar_clear(&s); + return ret; +} + +#endif diff --git a/bf/secp256k1/src/modules/ecdh/tests_impl.h b/bf/secp256k1/src/modules/ecdh/tests_impl.h new file mode 100644 index 0000000..271eb28 --- /dev/null +++ b/bf/secp256k1/src/modules/ecdh/tests_impl.h @@ -0,0 +1,75 @@ +/********************************************************************** + * Copyright (c) 2015 Andrew Poelstra * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#ifndef _SECP256K1_MODULE_ECDH_TESTS_ +#define _SECP256K1_MODULE_ECDH_TESTS_ + +void test_ecdh_generator_basepoint(void) { + unsigned char s_one[32] = { 0 }; + secp256k1_pubkey_t point[2]; + int i; + + s_one[31] = 1; + /* Check against pubkey creation when the basepoint is the generator */ + for (i = 0; i < 100; ++i) { + secp256k1_sha256_t sha; + unsigned char s_b32[32]; + unsigned char output_ecdh[32]; + unsigned char output_ser[32]; + unsigned char point_ser[33]; + int point_ser_len = sizeof(point_ser); + secp256k1_scalar_t s; + + random_scalar_order(&s); + secp256k1_scalar_get_b32(s_b32, &s); + + /* compute using ECDH function */ + CHECK(secp256k1_ec_pubkey_create(ctx, &point[0], s_one) == 1); + CHECK(secp256k1_ecdh(ctx, output_ecdh, &point[0], s_b32) == 1); + /* compute "explicitly" */ + CHECK(secp256k1_ec_pubkey_create(ctx, &point[1], s_b32) == 1); + CHECK(secp256k1_ec_pubkey_serialize(ctx, point_ser, &point_ser_len, &point[1], 1) == 1); + CHECK(point_ser_len == sizeof(point_ser)); + secp256k1_sha256_initialize(&sha); + secp256k1_sha256_write(&sha, point_ser, point_ser_len); + secp256k1_sha256_finalize(&sha, output_ser); + /* compare */ + CHECK(memcmp(output_ecdh, output_ser, sizeof(output_ser)) == 0); + } +} + +void test_bad_scalar(void) { + unsigned char s_zero[32] = { 0 }; + unsigned char s_overflow[32] = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe, + 0xba, 0xae, 0xdc, 0xe6, 0xaf, 0x48, 0xa0, 0x3b, + 0xbf, 0xd2, 0x5e, 0x8c, 0xd0, 0x36, 0x41, 0x41 + }; + unsigned char s_rand[32] = { 0 }; + unsigned char output[32]; + secp256k1_scalar_t rand; + secp256k1_pubkey_t point; + + /* Create random point */ + random_scalar_order(&rand); + secp256k1_scalar_get_b32(s_rand, &rand); + CHECK(secp256k1_ec_pubkey_create(ctx, &point, s_rand) == 1); + + /* Try to multiply it by bad values */ + CHECK(secp256k1_ecdh(ctx, output, &point, s_zero) == 0); + CHECK(secp256k1_ecdh(ctx, output, &point, s_overflow) == 0); + /* ...and a good one */ + s_overflow[31] -= 1; + CHECK(secp256k1_ecdh(ctx, output, &point, s_overflow) == 1); +} + +void run_ecdh_tests(void) { + test_ecdh_generator_basepoint(); + test_bad_scalar(); +} + +#endif diff --git a/bf/secp256k1/src/modules/schnorr/Makefile.am.include b/bf/secp256k1/src/modules/schnorr/Makefile.am.include new file mode 100644 index 0000000..bad4cb7 --- /dev/null +++ b/bf/secp256k1/src/modules/schnorr/Makefile.am.include @@ -0,0 +1,11 @@ +include_HEADERS += include/secp256k1_schnorr.h +noinst_HEADERS += src/modules/schnorr/main_impl.h +noinst_HEADERS += src/modules/schnorr/schnorr.h +noinst_HEADERS += src/modules/schnorr/schnorr_impl.h +noinst_HEADERS += src/modules/schnorr/tests_impl.h +if USE_BENCHMARK +noinst_PROGRAMS += bench_schnorr_verify +bench_schnorr_verify_SOURCES = src/bench_schnorr_verify.c +bench_schnorr_verify_LDADD = libsecp256k1.la $(SECP_LIBS) +bench_schnorr_verify_LDFLAGS = -static +endif diff --git a/bf/secp256k1/src/modules/schnorr/main_impl.h b/bf/secp256k1/src/modules/schnorr/main_impl.h new file mode 100644 index 0000000..f5250bf --- /dev/null +++ b/bf/secp256k1/src/modules/schnorr/main_impl.h @@ -0,0 +1,163 @@ +/********************************************************************** + * Copyright (c) 2014-2015 Pieter Wuille * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#ifndef SECP256K1_MODULE_SCHNORR_MAIN +#define SECP256K1_MODULE_SCHNORR_MAIN + +#include "modules/schnorr/schnorr_impl.h" + +static void secp256k1_schnorr_msghash_sha256(unsigned char *h32, const unsigned char *r32, const unsigned char *msg32) { + secp256k1_sha256_t sha; + secp256k1_sha256_initialize(&sha); + secp256k1_sha256_write(&sha, r32, 32); + secp256k1_sha256_write(&sha, msg32, 32); + secp256k1_sha256_finalize(&sha, h32); +} + +static const unsigned char secp256k1_schnorr_algo16[16] = "Schnorr+SHA256 "; + +int secp256k1_schnorr_sign(const secp256k1_context_t* ctx, const unsigned char *msg32, unsigned char *sig64, const unsigned char *seckey, secp256k1_nonce_function_t noncefp, const void* noncedata) { + secp256k1_scalar_t sec, non; + int ret = 0; + int overflow = 0; + unsigned int count = 0; + ARG_CHECK(ctx != NULL); + ARG_CHECK(secp256k1_ecmult_gen_context_is_built(&ctx->ecmult_gen_ctx)); + ARG_CHECK(msg32 != NULL); + ARG_CHECK(sig64 != NULL); + ARG_CHECK(seckey != NULL); + if (noncefp == NULL) { + noncefp = secp256k1_nonce_function_default; + } + + secp256k1_scalar_set_b32(&sec, seckey, NULL); + while (1) { + unsigned char nonce32[32]; + ret = noncefp(nonce32, msg32, seckey, secp256k1_schnorr_algo16, count, noncedata); + if (!ret) { + break; + } + secp256k1_scalar_set_b32(&non, nonce32, &overflow); + memset(nonce32, 0, 32); + if (!secp256k1_scalar_is_zero(&non) && !overflow) { + if (secp256k1_schnorr_sig_sign(&ctx->ecmult_gen_ctx, sig64, &sec, &non, NULL, secp256k1_schnorr_msghash_sha256, msg32)) { + break; + } + } + count++; + } + if (!ret) { + memset(sig64, 0, 64); + } + secp256k1_scalar_clear(&non); + secp256k1_scalar_clear(&sec); + return ret; +} + +int secp256k1_schnorr_verify(const secp256k1_context_t* ctx, const unsigned char *msg32, const unsigned char *sig64, const secp256k1_pubkey_t *pubkey) { + secp256k1_ge_t q; + ARG_CHECK(ctx != NULL); + ARG_CHECK(secp256k1_ecmult_context_is_built(&ctx->ecmult_ctx)); + ARG_CHECK(msg32 != NULL); + ARG_CHECK(sig64 != NULL); + ARG_CHECK(pubkey != NULL); + + secp256k1_pubkey_load(ctx, &q, pubkey); + return secp256k1_schnorr_sig_verify(&ctx->ecmult_ctx, sig64, &q, secp256k1_schnorr_msghash_sha256, msg32); +} + +int secp256k1_schnorr_recover(const secp256k1_context_t* ctx, const unsigned char *msg32, const unsigned char *sig64, secp256k1_pubkey_t *pubkey) { + secp256k1_ge_t q; + + ARG_CHECK(ctx != NULL); + ARG_CHECK(secp256k1_ecmult_context_is_built(&ctx->ecmult_ctx)); + ARG_CHECK(msg32 != NULL); + ARG_CHECK(sig64 != NULL); + ARG_CHECK(pubkey != NULL); + + if (secp256k1_schnorr_sig_recover(&ctx->ecmult_ctx, sig64, &q, secp256k1_schnorr_msghash_sha256, msg32)) { + secp256k1_pubkey_save(pubkey, &q); + return 1; + } else { + memset(pubkey, 0, sizeof(*pubkey)); + return 0; + } +} + +int secp256k1_schnorr_generate_nonce_pair(const secp256k1_context_t* ctx, const unsigned char *msg32, const unsigned char *sec32, secp256k1_nonce_function_t noncefp, const void* noncedata, secp256k1_pubkey_t *pubnonce, unsigned char *privnonce32) { + int count = 0; + int ret = 1; + secp256k1_gej_t Qj; + secp256k1_ge_t Q; + secp256k1_scalar_t sec; + + ARG_CHECK(ctx != NULL); + ARG_CHECK(secp256k1_ecmult_gen_context_is_built(&ctx->ecmult_gen_ctx)); + ARG_CHECK(msg32 != NULL); + ARG_CHECK(sec32 != NULL); + ARG_CHECK(pubnonce != NULL); + ARG_CHECK(privnonce32 != NULL); + + if (noncefp == NULL) { + noncefp = secp256k1_nonce_function_default; + } + + do { + int overflow; + ret = noncefp(privnonce32, msg32, sec32, secp256k1_schnorr_algo16, count++, noncedata); + if (!ret) { + break; + } + secp256k1_scalar_set_b32(&sec, privnonce32, &overflow); + if (overflow || secp256k1_scalar_is_zero(&sec)) { + continue; + } + secp256k1_ecmult_gen(&ctx->ecmult_gen_ctx, &Qj, &sec); + secp256k1_ge_set_gej(&Q, &Qj); + + secp256k1_pubkey_save(pubnonce, &Q); + break; + } while(1); + + secp256k1_scalar_clear(&sec); + if (!ret) { + memset(pubnonce, 0, sizeof(*pubnonce)); + } + return ret; +} + +int secp256k1_schnorr_partial_sign(const secp256k1_context_t* ctx, const unsigned char *msg32, unsigned char *sig64, const unsigned char *sec32, const unsigned char *secnonce32, const secp256k1_pubkey_t *pubnonce_others) { + int overflow = 0; + secp256k1_scalar_t sec, non; + secp256k1_ge_t pubnon; + ARG_CHECK(ctx != NULL); + ARG_CHECK(secp256k1_ecmult_gen_context_is_built(&ctx->ecmult_gen_ctx)); + ARG_CHECK(msg32 != NULL); + ARG_CHECK(sig64 != NULL); + ARG_CHECK(sec32 != NULL); + ARG_CHECK(secnonce32 != NULL); + ARG_CHECK(pubnonce_others != NULL); + + secp256k1_scalar_set_b32(&sec, sec32, &overflow); + if (overflow || secp256k1_scalar_is_zero(&sec)) { + return -1; + } + secp256k1_scalar_set_b32(&non, secnonce32, &overflow); + if (overflow || secp256k1_scalar_is_zero(&non)) { + return -1; + } + secp256k1_pubkey_load(ctx, &pubnon, pubnonce_others); + return secp256k1_schnorr_sig_sign(&ctx->ecmult_gen_ctx, sig64, &sec, &non, &pubnon, secp256k1_schnorr_msghash_sha256, msg32); +} + +int secp256k1_schnorr_partial_combine(const secp256k1_context_t* ctx, unsigned char *sig64, int n, const unsigned char * const *sig64sin) { + ARG_CHECK(sig64 != NULL); + ARG_CHECK(n >= 1); + ARG_CHECK(sig64sin != NULL); + return secp256k1_schnorr_sig_combine(sig64, n, sig64sin); +} + +#endif diff --git a/bf/secp256k1/src/modules/schnorr/schnorr.h b/bf/secp256k1/src/modules/schnorr/schnorr.h new file mode 100644 index 0000000..34311ab --- /dev/null +++ b/bf/secp256k1/src/modules/schnorr/schnorr.h @@ -0,0 +1,20 @@ +/*********************************************************************** + * Copyright (c) 2014-2015 Pieter Wuille * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php. * + ***********************************************************************/ + +#ifndef _SECP256K1_SCHNORR_ +#define _SECP256K1_SCHNORR_ + +#include "scalar.h" +#include "group.h" + +typedef void (*secp256k1_schnorr_msghash_t)(unsigned char *h32, const unsigned char *r32, const unsigned char *msg32); + +static int secp256k1_schnorr_sig_sign(const secp256k1_ecmult_gen_context_t* ctx, unsigned char *sig64, const secp256k1_scalar_t *key, const secp256k1_scalar_t *nonce, const secp256k1_ge_t *pubnonce, secp256k1_schnorr_msghash_t hash, const unsigned char *msg32); +static int secp256k1_schnorr_sig_verify(const secp256k1_ecmult_context_t* ctx, const unsigned char *sig64, const secp256k1_ge_t *pubkey, secp256k1_schnorr_msghash_t hash, const unsigned char *msg32); +static int secp256k1_schnorr_sig_recover(const secp256k1_ecmult_context_t* ctx, const unsigned char *sig64, secp256k1_ge_t *pubkey, secp256k1_schnorr_msghash_t hash, const unsigned char *msg32); +static int secp256k1_schnorr_sig_combine(unsigned char *sig64, int n, const unsigned char * const *sig64ins); + +#endif diff --git a/bf/secp256k1/src/modules/schnorr/schnorr_impl.h b/bf/secp256k1/src/modules/schnorr/schnorr_impl.h new file mode 100644 index 0000000..e8e971f --- /dev/null +++ b/bf/secp256k1/src/modules/schnorr/schnorr_impl.h @@ -0,0 +1,207 @@ +/*********************************************************************** + * Copyright (c) 2014-2015 Pieter Wuille * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php. * + ***********************************************************************/ + +#ifndef _SECP256K1_SCHNORR_IMPL_H_ +#define _SECP256K1_SCHNORR_IMPL_H_ + +#include + +#include "schnorr.h" +#include "num.h" +#include "field.h" +#include "group.h" +#include "ecmult.h" +#include "ecmult_gen.h" + +/** + * Custom Schnorr-based signature scheme. They support multiparty signing, public key + * recovery and batch validation. + * + * Rationale for verifying R's y coordinate: + * In order to support batch validation and public key recovery, the full R point must + * be known to verifiers, rather than just its x coordinate. In order to not risk + * being more strict in batch validation than normal validation, validators must be + * required to reject signatures with incorrect y coordinate. This is only possible + * by including a (relatively slow) field inverse, or a field square root. However, + * batch validation offers potentially much higher benefits than this cost. + * + * Rationale for having an implicit y coordinate oddness: + * If we commit to having the full R point known to verifiers, there are two mechanism. + * Either include its oddness in the signature, or give it an implicit fixed value. + * As the R y coordinate can be flipped by a simple negation of the nonce, we choose the + * latter, as it comes with nearly zero impact on signing or validation performance, and + * saves a byte in the signature. + * + * Signing: + * Inputs: 32-byte message m, 32-byte scalar key x (!=0), 32-byte scalar nonce k (!=0) + * + * Compute point R = k * G. Reject nonce if R's y coordinate is odd (or negate nonce). + * Compute 32-byte r, the serialization of R's x coordinate. + * Compute scalar h = Hash(r || m). Reject nonce if h == 0 or h >= order. + * Compute scalar s = k - h * x. + * The signature is (r, s). + * + * + * Verification: + * Inputs: 32-byte message m, public key point Q, signature: (32-byte r, scalar s) + * + * Signature is invalid if s >= order. + * Signature is invalid if r >= p. + * Compute scalar h = Hash(r || m). Signature is invalid if h == 0 or h >= order. + * Option 1 (faster for single verification): + * Compute point R = h * Q + s * G. Signature is invalid if R is infinity or R's y coordinate is odd. + * Signature is valid if the serialization of R's x coordinate equals r. + * Option 2 (allows batch validation and pubkey recovery): + * Decompress x coordinate r into point R, with odd y coordinate. Fail if R is not on the curve. + * Signature is valid if R + h * Q + s * G == 0. + */ + +static int secp256k1_schnorr_sig_sign(const secp256k1_ecmult_gen_context_t* ctx, unsigned char *sig64, const secp256k1_scalar_t *key, const secp256k1_scalar_t *nonce, const secp256k1_ge_t *pubnonce, secp256k1_schnorr_msghash_t hash, const unsigned char *msg32) { + secp256k1_gej_t Rj; + secp256k1_ge_t Ra; + unsigned char h32[32]; + secp256k1_scalar_t h, s; + int overflow; + secp256k1_scalar_t n; + + if (secp256k1_scalar_is_zero(key) || secp256k1_scalar_is_zero(nonce)) { + return 0; + } + n = *nonce; + + secp256k1_ecmult_gen(ctx, &Rj, &n); + if (pubnonce) { + secp256k1_gej_add_ge(&Rj, &Rj, pubnonce); + } + secp256k1_ge_set_gej(&Ra, &Rj); + secp256k1_fe_normalize(&Ra.y); + if (secp256k1_fe_is_odd(&Ra.y)) { + /* R's y coordinate is odd, which is not allowed (see rationale above). + Force it to be even by negating the nonce. Note that this even works + for multiparty signing, as the R point is known to all participants, + which can all decide to flip the sign in unison, resulting in the + overall R point to be negated too. */ + secp256k1_scalar_negate(&n, &n); + } + secp256k1_fe_normalize(&Ra.x); + secp256k1_fe_get_b32(sig64, &Ra.x); + hash(h32, sig64, msg32); + overflow = 0; + secp256k1_scalar_set_b32(&h, h32, &overflow); + if (overflow || secp256k1_scalar_is_zero(&h)) { + secp256k1_scalar_clear(&n); + return 0; + } + secp256k1_scalar_mul(&s, &h, key); + secp256k1_scalar_negate(&s, &s); + secp256k1_scalar_add(&s, &s, &n); + secp256k1_scalar_clear(&n); + secp256k1_scalar_get_b32(sig64 + 32, &s); + return 1; +} + +static int secp256k1_schnorr_sig_verify(const secp256k1_ecmult_context_t* ctx, const unsigned char *sig64, const secp256k1_ge_t *pubkey, secp256k1_schnorr_msghash_t hash, const unsigned char *msg32) { + secp256k1_gej_t Qj, Rj; + secp256k1_ge_t Ra; + secp256k1_fe_t Rx; + secp256k1_scalar_t h, s; + unsigned char hh[32]; + int overflow; + + if (secp256k1_ge_is_infinity(pubkey)) { + return 0; + } + hash(hh, sig64, msg32); + overflow = 0; + secp256k1_scalar_set_b32(&h, hh, &overflow); + if (overflow || secp256k1_scalar_is_zero(&h)) { + return 0; + } + overflow = 0; + secp256k1_scalar_set_b32(&s, sig64 + 32, &overflow); + if (overflow) { + return 0; + } + if (!secp256k1_fe_set_b32(&Rx, sig64)) { + return 0; + } + secp256k1_gej_set_ge(&Qj, pubkey); + secp256k1_ecmult(ctx, &Rj, &Qj, &h, &s); + if (secp256k1_gej_is_infinity(&Rj)) { + return 0; + } + secp256k1_ge_set_gej_var(&Ra, &Rj); + secp256k1_fe_normalize_var(&Ra.y); + if (secp256k1_fe_is_odd(&Ra.y)) { + return 0; + } + return secp256k1_fe_equal_var(&Rx, &Ra.x); +} + +static int secp256k1_schnorr_sig_recover(const secp256k1_ecmult_context_t* ctx, const unsigned char *sig64, secp256k1_ge_t *pubkey, secp256k1_schnorr_msghash_t hash, const unsigned char *msg32) { + secp256k1_gej_t Qj, Rj; + secp256k1_ge_t Ra; + secp256k1_fe_t Rx; + secp256k1_scalar_t h, s; + unsigned char hh[32]; + int overflow; + + hash(hh, sig64, msg32); + overflow = 0; + secp256k1_scalar_set_b32(&h, hh, &overflow); + if (overflow || secp256k1_scalar_is_zero(&h)) { + return 0; + } + overflow = 0; + secp256k1_scalar_set_b32(&s, sig64 + 32, &overflow); + if (overflow) { + return 0; + } + if (!secp256k1_fe_set_b32(&Rx, sig64)) { + return 0; + } + if (!secp256k1_ge_set_xo_var(&Ra, &Rx, 0)) { + return 0; + } + secp256k1_gej_set_ge(&Rj, &Ra); + secp256k1_scalar_inverse_var(&h, &h); + secp256k1_scalar_negate(&s, &s); + secp256k1_scalar_mul(&s, &s, &h); + secp256k1_ecmult(ctx, &Qj, &Rj, &h, &s); + if (secp256k1_gej_is_infinity(&Qj)) { + return 0; + } + secp256k1_ge_set_gej(pubkey, &Qj); + return 1; +} + +static int secp256k1_schnorr_sig_combine(unsigned char *sig64, int n, const unsigned char * const *sig64ins) { + secp256k1_scalar_t s = SECP256K1_SCALAR_CONST(0, 0, 0, 0, 0, 0, 0, 0); + int i; + for (i = 0; i < n; i++) { + secp256k1_scalar_t si; + int overflow; + secp256k1_scalar_set_b32(&si, sig64ins[i] + 32, &overflow); + if (overflow) { + return -1; + } + if (i) { + if (memcmp(sig64ins[i - 1], sig64ins[i], 32) != 0) { + return -1; + } + } + secp256k1_scalar_add(&s, &s, &si); + } + if (secp256k1_scalar_is_zero(&s)) { + return 0; + } + memcpy(sig64, sig64ins[0], 32); + secp256k1_scalar_get_b32(sig64 + 32, &s); + secp256k1_scalar_clear(&s); + return 1; +} + +#endif diff --git a/bf/secp256k1/src/modules/schnorr/tests_impl.h b/bf/secp256k1/src/modules/schnorr/tests_impl.h new file mode 100644 index 0000000..402eb63 --- /dev/null +++ b/bf/secp256k1/src/modules/schnorr/tests_impl.h @@ -0,0 +1,173 @@ +/********************************************************************** + * Copyright (c) 2014-2015 Pieter Wuille * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#ifndef SECP256K1_MODULE_SCHNORR_TESTS +#define SECP256K1_MODULE_SCHNORR_TESTS + +void test_schnorr_end_to_end(void) { + unsigned char privkey[32]; + unsigned char message[32]; + unsigned char schnorr_signature[64]; + secp256k1_pubkey_t pubkey, recpubkey; + + /* Generate a random key and message. */ + { + secp256k1_scalar_t key; + random_scalar_order_test(&key); + secp256k1_scalar_get_b32(privkey, &key); + secp256k1_rand256_test(message); + } + + /* Construct and verify corresponding public key. */ + CHECK(secp256k1_ec_seckey_verify(ctx, privkey) == 1); + CHECK(secp256k1_ec_pubkey_create(ctx, &pubkey, privkey) == 1); + + /* Schnorr sign. */ + CHECK(secp256k1_schnorr_sign(ctx, message, schnorr_signature, privkey, NULL, NULL) == 1); + CHECK(secp256k1_schnorr_verify(ctx, message, schnorr_signature, &pubkey) == 1); + CHECK(secp256k1_schnorr_recover(ctx, message, schnorr_signature, &recpubkey) == 1); + CHECK(memcmp(&pubkey, &recpubkey, sizeof(pubkey)) == 0); + /* Destroy signature and verify again. */ + schnorr_signature[secp256k1_rand32() % 64] += 1 + (secp256k1_rand32() % 255); + CHECK(secp256k1_schnorr_verify(ctx, message, schnorr_signature, &pubkey) == 0); + CHECK(secp256k1_schnorr_recover(ctx, message, schnorr_signature, &recpubkey) != 1 || + memcmp(&pubkey, &recpubkey, sizeof(pubkey)) != 0); +} + +/** Horribly broken hash function. Do not use for anything but tests. */ +void test_schnorr_hash(unsigned char *h32, const unsigned char *r32, const unsigned char *msg32) { + int i; + for (i = 0; i < 32; i++) { + h32[i] = r32[i] ^ msg32[i]; + } +} + +void test_schnorr_sign_verify(void) { + unsigned char msg32[32]; + unsigned char sig64[3][64]; + secp256k1_gej_t pubkeyj[3]; + secp256k1_ge_t pubkey[3]; + secp256k1_scalar_t nonce[3], key[3]; + int i = 0; + int k; + + secp256k1_rand256_test(msg32); + + for (k = 0; k < 3; k++) { + random_scalar_order_test(&key[k]); + + do { + random_scalar_order_test(&nonce[k]); + if (secp256k1_schnorr_sig_sign(&ctx->ecmult_gen_ctx, sig64[k], &key[k], &nonce[k], NULL, &test_schnorr_hash, msg32)) { + break; + } + } while(1); + + secp256k1_ecmult_gen(&ctx->ecmult_gen_ctx, &pubkeyj[k], &key[k]); + secp256k1_ge_set_gej_var(&pubkey[k], &pubkeyj[k]); + CHECK(secp256k1_schnorr_sig_verify(&ctx->ecmult_ctx, sig64[k], &pubkey[k], &test_schnorr_hash, msg32)); + + for (i = 0; i < 4; i++) { + int pos = secp256k1_rand32() % 64; + int mod = 1 + (secp256k1_rand32() % 255); + sig64[k][pos] ^= mod; + CHECK(secp256k1_schnorr_sig_verify(&ctx->ecmult_ctx, sig64[k], &pubkey[k], &test_schnorr_hash, msg32) == 0); + sig64[k][pos] ^= mod; + } + } +} + +void test_schnorr_threshold(void) { + unsigned char msg[32]; + unsigned char sec[5][32]; + secp256k1_pubkey_t pub[5]; + unsigned char nonce[5][32]; + secp256k1_pubkey_t pubnonce[5]; + unsigned char sig[5][64]; + const unsigned char* sigs[5]; + unsigned char allsig[64]; + const secp256k1_pubkey_t* pubs[5]; + secp256k1_pubkey_t allpub; + int n, i; + int damage; + int ret = 0; + + damage = (secp256k1_rand32() % 2) ? (1 + (secp256k1_rand32() % 4)) : 0; + secp256k1_rand256_test(msg); + n = 2 + (secp256k1_rand32() % 4); + for (i = 0; i < n; i++) { + do { + secp256k1_rand256_test(sec[i]); + } while (!secp256k1_ec_seckey_verify(ctx, sec[i])); + CHECK(secp256k1_ec_pubkey_create(ctx, &pub[i], sec[i])); + CHECK(secp256k1_schnorr_generate_nonce_pair(ctx, msg, sec[i], NULL, NULL, &pubnonce[i], nonce[i])); + pubs[i] = &pub[i]; + } + if (damage == 1) { + nonce[secp256k1_rand32() % n][secp256k1_rand32() % 32] ^= 1 + (secp256k1_rand32() % 255); + } else if (damage == 2) { + sec[secp256k1_rand32() % n][secp256k1_rand32() % 32] ^= 1 + (secp256k1_rand32() % 255); + } + for (i = 0; i < n; i++) { + secp256k1_pubkey_t allpubnonce; + const secp256k1_pubkey_t *pubnonces[4]; + int j; + for (j = 0; j < i; j++) { + pubnonces[j] = &pubnonce[j]; + } + for (j = i + 1; j < n; j++) { + pubnonces[j - 1] = &pubnonce[j]; + } + CHECK(secp256k1_ec_pubkey_combine(ctx, &allpubnonce, n - 1, pubnonces)); + ret |= (secp256k1_schnorr_partial_sign(ctx, msg, sig[i], sec[i], nonce[i], &allpubnonce) != 1) * 1; + sigs[i] = sig[i]; + } + if (damage == 3) { + sig[secp256k1_rand32() % n][secp256k1_rand32() % 64] ^= 1 + (secp256k1_rand32() % 255); + } + ret |= (secp256k1_ec_pubkey_combine(ctx, &allpub, n, pubs) != 1) * 2; + if ((ret & 1) == 0) { + ret |= (secp256k1_schnorr_partial_combine(ctx, allsig, n, sigs) != 1) * 4; + } + if (damage == 4) { + allsig[secp256k1_rand32() % 32] ^= 1 + (secp256k1_rand32() % 255); + } + if ((ret & 7) == 0) { + ret |= (secp256k1_schnorr_verify(ctx, msg, allsig, &allpub) != 1) * 8; + } + CHECK((ret == 0) == (damage == 0)); +} + +void test_schnorr_recovery(void) { + unsigned char msg32[32]; + unsigned char sig64[64]; + secp256k1_ge_t Q; + + secp256k1_rand256_test(msg32); + secp256k1_rand256_test(sig64); + secp256k1_rand256_test(sig64 + 32); + if (secp256k1_schnorr_sig_recover(&ctx->ecmult_ctx, sig64, &Q, &test_schnorr_hash, msg32) == 1) { + CHECK(secp256k1_schnorr_sig_verify(&ctx->ecmult_ctx, sig64, &Q, &test_schnorr_hash, msg32) == 1); + } +} + +void run_schnorr_tests(void) { + int i; + for (i = 0; i < 32*count; i++) { + test_schnorr_end_to_end(); + } + for (i = 0; i < 32 * count; i++) { + test_schnorr_sign_verify(); + } + for (i = 0; i < 16 * count; i++) { + test_schnorr_recovery(); + } + for (i = 0; i < 10 * count; i++) { + test_schnorr_threshold(); + } +} + +#endif diff --git a/bf/secp256k1/src/num.h b/bf/secp256k1/src/num.h new file mode 100644 index 0000000..339b6bb --- /dev/null +++ b/bf/secp256k1/src/num.h @@ -0,0 +1,68 @@ +/********************************************************************** + * Copyright (c) 2013, 2014 Pieter Wuille * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#ifndef _SECP256K1_NUM_ +#define _SECP256K1_NUM_ + +#ifndef USE_NUM_NONE + +#if defined HAVE_CONFIG_H +#include "libsecp256k1-config.h" +#endif + +#if defined(USE_NUM_GMP) +#include "num_gmp.h" +#else +#error "Please select num implementation" +#endif + +/** Copy a number. */ +static void secp256k1_num_copy(secp256k1_num_t *r, const secp256k1_num_t *a); + +/** Convert a number's absolute value to a binary big-endian string. + * There must be enough place. */ +static void secp256k1_num_get_bin(unsigned char *r, unsigned int rlen, const secp256k1_num_t *a); + +/** Set a number to the value of a binary big-endian string. */ +static void secp256k1_num_set_bin(secp256k1_num_t *r, const unsigned char *a, unsigned int alen); + +/** Compute a modular inverse. The input must be less than the modulus. */ +static void secp256k1_num_mod_inverse(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *m); + +/** Compare the absolute value of two numbers. */ +static int secp256k1_num_cmp(const secp256k1_num_t *a, const secp256k1_num_t *b); + +/** Test whether two number are equal (including sign). */ +static int secp256k1_num_eq(const secp256k1_num_t *a, const secp256k1_num_t *b); + +/** Add two (signed) numbers. */ +static void secp256k1_num_add(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b); + +/** Subtract two (signed) numbers. */ +static void secp256k1_num_sub(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b); + +/** Multiply two (signed) numbers. */ +static void secp256k1_num_mul(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b); + +/** Replace a number by its remainder modulo m. M's sign is ignored. The result is a number between 0 and m-1, + even if r was negative. */ +static void secp256k1_num_mod(secp256k1_num_t *r, const secp256k1_num_t *m); + +/** Right-shift the passed number by bits bits. */ +static void secp256k1_num_shift(secp256k1_num_t *r, int bits); + +/** Check whether a number is zero. */ +static int secp256k1_num_is_zero(const secp256k1_num_t *a); + +/** Check whether a number is strictly negative. */ +static int secp256k1_num_is_neg(const secp256k1_num_t *a); + +/** Change a number's sign. */ +static void secp256k1_num_negate(secp256k1_num_t *r); + +#endif + +#endif diff --git a/bf/secp256k1/src/num_gmp.h b/bf/secp256k1/src/num_gmp.h new file mode 100644 index 0000000..baa1f2b --- /dev/null +++ b/bf/secp256k1/src/num_gmp.h @@ -0,0 +1,20 @@ +/********************************************************************** + * Copyright (c) 2013, 2014 Pieter Wuille * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#ifndef _SECP256K1_NUM_REPR_ +#define _SECP256K1_NUM_REPR_ + +#include + +#define NUM_LIMBS ((256+GMP_NUMB_BITS-1)/GMP_NUMB_BITS) + +typedef struct { + mp_limb_t data[2*NUM_LIMBS]; + int neg; + int limbs; +} secp256k1_num_t; + +#endif diff --git a/bf/secp256k1/src/num_gmp_impl.h b/bf/secp256k1/src/num_gmp_impl.h new file mode 100644 index 0000000..dbbc458 --- /dev/null +++ b/bf/secp256k1/src/num_gmp_impl.h @@ -0,0 +1,260 @@ +/********************************************************************** + * Copyright (c) 2013, 2014 Pieter Wuille * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#ifndef _SECP256K1_NUM_REPR_IMPL_H_ +#define _SECP256K1_NUM_REPR_IMPL_H_ + +#include +#include +#include + +#include "util.h" +#include "num.h" + +#ifdef VERIFY +static void secp256k1_num_sanity(const secp256k1_num_t *a) { + VERIFY_CHECK(a->limbs == 1 || (a->limbs > 1 && a->data[a->limbs-1] != 0)); +} +#else +#define secp256k1_num_sanity(a) do { } while(0) +#endif + +static void secp256k1_num_copy(secp256k1_num_t *r, const secp256k1_num_t *a) { + *r = *a; +} + +static void secp256k1_num_get_bin(unsigned char *r, unsigned int rlen, const secp256k1_num_t *a) { + unsigned char tmp[65]; + int len = 0; + int shift = 0; + if (a->limbs>1 || a->data[0] != 0) { + len = mpn_get_str(tmp, 256, (mp_limb_t*)a->data, a->limbs); + } + while (shift < len && tmp[shift] == 0) shift++; + VERIFY_CHECK(len-shift <= (int)rlen); + memset(r, 0, rlen - len + shift); + if (len > shift) { + memcpy(r + rlen - len + shift, tmp + shift, len - shift); + } + memset(tmp, 0, sizeof(tmp)); +} + +static void secp256k1_num_set_bin(secp256k1_num_t *r, const unsigned char *a, unsigned int alen) { + int len; + VERIFY_CHECK(alen > 0); + VERIFY_CHECK(alen <= 64); + len = mpn_set_str(r->data, a, alen, 256); + if (len == 0) { + r->data[0] = 0; + len = 1; + } + VERIFY_CHECK(len <= NUM_LIMBS*2); + r->limbs = len; + r->neg = 0; + while (r->limbs > 1 && r->data[r->limbs-1]==0) { + r->limbs--; + } +} + +static void secp256k1_num_add_abs(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b) { + mp_limb_t c = mpn_add(r->data, a->data, a->limbs, b->data, b->limbs); + r->limbs = a->limbs; + if (c != 0) { + VERIFY_CHECK(r->limbs < 2*NUM_LIMBS); + r->data[r->limbs++] = c; + } +} + +static void secp256k1_num_sub_abs(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b) { + mp_limb_t c = mpn_sub(r->data, a->data, a->limbs, b->data, b->limbs); + VERIFY_CHECK(c == 0); + r->limbs = a->limbs; + while (r->limbs > 1 && r->data[r->limbs-1]==0) { + r->limbs--; + } +} + +static void secp256k1_num_mod(secp256k1_num_t *r, const secp256k1_num_t *m) { + secp256k1_num_sanity(r); + secp256k1_num_sanity(m); + + if (r->limbs >= m->limbs) { + mp_limb_t t[2*NUM_LIMBS]; + mpn_tdiv_qr(t, r->data, 0, r->data, r->limbs, m->data, m->limbs); + memset(t, 0, sizeof(t)); + r->limbs = m->limbs; + while (r->limbs > 1 && r->data[r->limbs-1]==0) { + r->limbs--; + } + } + + if (r->neg && (r->limbs > 1 || r->data[0] != 0)) { + secp256k1_num_sub_abs(r, m, r); + r->neg = 0; + } +} + +static void secp256k1_num_mod_inverse(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *m) { + int i; + mp_limb_t g[NUM_LIMBS+1]; + mp_limb_t u[NUM_LIMBS+1]; + mp_limb_t v[NUM_LIMBS+1]; + mp_size_t sn; + mp_size_t gn; + secp256k1_num_sanity(a); + secp256k1_num_sanity(m); + + /** mpn_gcdext computes: (G,S) = gcdext(U,V), where + * * G = gcd(U,V) + * * G = U*S + V*T + * * U has equal or more limbs than V, and V has no padding + * If we set U to be (a padded version of) a, and V = m: + * G = a*S + m*T + * G = a*S mod m + * Assuming G=1: + * S = 1/a mod m + */ + VERIFY_CHECK(m->limbs <= NUM_LIMBS); + VERIFY_CHECK(m->data[m->limbs-1] != 0); + for (i = 0; i < m->limbs; i++) { + u[i] = (i < a->limbs) ? a->data[i] : 0; + v[i] = m->data[i]; + } + sn = NUM_LIMBS+1; + gn = mpn_gcdext(g, r->data, &sn, u, m->limbs, v, m->limbs); + VERIFY_CHECK(gn == 1); + VERIFY_CHECK(g[0] == 1); + r->neg = a->neg ^ m->neg; + if (sn < 0) { + mpn_sub(r->data, m->data, m->limbs, r->data, -sn); + r->limbs = m->limbs; + while (r->limbs > 1 && r->data[r->limbs-1]==0) { + r->limbs--; + } + } else { + r->limbs = sn; + } + memset(g, 0, sizeof(g)); + memset(u, 0, sizeof(u)); + memset(v, 0, sizeof(v)); +} + +static int secp256k1_num_is_zero(const secp256k1_num_t *a) { + return (a->limbs == 1 && a->data[0] == 0); +} + +static int secp256k1_num_is_neg(const secp256k1_num_t *a) { + return (a->limbs > 1 || a->data[0] != 0) && a->neg; +} + +static int secp256k1_num_cmp(const secp256k1_num_t *a, const secp256k1_num_t *b) { + if (a->limbs > b->limbs) { + return 1; + } + if (a->limbs < b->limbs) { + return -1; + } + return mpn_cmp(a->data, b->data, a->limbs); +} + +static int secp256k1_num_eq(const secp256k1_num_t *a, const secp256k1_num_t *b) { + if (a->limbs > b->limbs) { + return 0; + } + if (a->limbs < b->limbs) { + return 0; + } + if ((a->neg && !secp256k1_num_is_zero(a)) != (b->neg && !secp256k1_num_is_zero(b))) { + return 0; + } + return mpn_cmp(a->data, b->data, a->limbs) == 0; +} + +static void secp256k1_num_subadd(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b, int bneg) { + if (!(b->neg ^ bneg ^ a->neg)) { /* a and b have the same sign */ + r->neg = a->neg; + if (a->limbs >= b->limbs) { + secp256k1_num_add_abs(r, a, b); + } else { + secp256k1_num_add_abs(r, b, a); + } + } else { + if (secp256k1_num_cmp(a, b) > 0) { + r->neg = a->neg; + secp256k1_num_sub_abs(r, a, b); + } else { + r->neg = b->neg ^ bneg; + secp256k1_num_sub_abs(r, b, a); + } + } +} + +static void secp256k1_num_add(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b) { + secp256k1_num_sanity(a); + secp256k1_num_sanity(b); + secp256k1_num_subadd(r, a, b, 0); +} + +static void secp256k1_num_sub(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b) { + secp256k1_num_sanity(a); + secp256k1_num_sanity(b); + secp256k1_num_subadd(r, a, b, 1); +} + +static void secp256k1_num_mul(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b) { + mp_limb_t tmp[2*NUM_LIMBS+1]; + secp256k1_num_sanity(a); + secp256k1_num_sanity(b); + + VERIFY_CHECK(a->limbs + b->limbs <= 2*NUM_LIMBS+1); + if ((a->limbs==1 && a->data[0]==0) || (b->limbs==1 && b->data[0]==0)) { + r->limbs = 1; + r->neg = 0; + r->data[0] = 0; + return; + } + if (a->limbs >= b->limbs) { + mpn_mul(tmp, a->data, a->limbs, b->data, b->limbs); + } else { + mpn_mul(tmp, b->data, b->limbs, a->data, a->limbs); + } + r->limbs = a->limbs + b->limbs; + if (r->limbs > 1 && tmp[r->limbs - 1]==0) { + r->limbs--; + } + VERIFY_CHECK(r->limbs <= 2*NUM_LIMBS); + mpn_copyi(r->data, tmp, r->limbs); + r->neg = a->neg ^ b->neg; + memset(tmp, 0, sizeof(tmp)); +} + +static void secp256k1_num_shift(secp256k1_num_t *r, int bits) { + int i; + if (bits % GMP_NUMB_BITS) { + /* Shift within limbs. */ + mpn_rshift(r->data, r->data, r->limbs, bits % GMP_NUMB_BITS); + } + if (bits >= GMP_NUMB_BITS) { + /* Shift full limbs. */ + for (i = 0; i < r->limbs; i++) { + int index = i + (bits / GMP_NUMB_BITS); + if (index < r->limbs && index < 2*NUM_LIMBS) { + r->data[i] = r->data[index]; + } else { + r->data[i] = 0; + } + } + } + while (r->limbs>1 && r->data[r->limbs-1]==0) { + r->limbs--; + } +} + +static void secp256k1_num_negate(secp256k1_num_t *r) { + r->neg ^= 1; +} + +#endif diff --git a/bf/secp256k1/src/num_impl.h b/bf/secp256k1/src/num_impl.h new file mode 100644 index 0000000..0b0e3a0 --- /dev/null +++ b/bf/secp256k1/src/num_impl.h @@ -0,0 +1,24 @@ +/********************************************************************** + * Copyright (c) 2013, 2014 Pieter Wuille * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#ifndef _SECP256K1_NUM_IMPL_H_ +#define _SECP256K1_NUM_IMPL_H_ + +#if defined HAVE_CONFIG_H +#include "libsecp256k1-config.h" +#endif + +#include "num.h" + +#if defined(USE_NUM_GMP) +#include "num_gmp_impl.h" +#elif defined(USE_NUM_NONE) +/* Nothing. */ +#else +#error "Please select num implementation" +#endif + +#endif diff --git a/bf/secp256k1/src/scalar.h b/bf/secp256k1/src/scalar.h new file mode 100644 index 0000000..72e12b8 --- /dev/null +++ b/bf/secp256k1/src/scalar.h @@ -0,0 +1,104 @@ +/********************************************************************** + * Copyright (c) 2014 Pieter Wuille * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#ifndef _SECP256K1_SCALAR_ +#define _SECP256K1_SCALAR_ + +#include "num.h" + +#if defined HAVE_CONFIG_H +#include "libsecp256k1-config.h" +#endif + +#if defined(USE_SCALAR_4X64) +#include "scalar_4x64.h" +#elif defined(USE_SCALAR_8X32) +#include "scalar_8x32.h" +#else +#error "Please select scalar implementation" +#endif + +/** Clear a scalar to prevent the leak of sensitive data. */ +static void secp256k1_scalar_clear(secp256k1_scalar_t *r); + +/** Access bits from a scalar. All requested bits must belong to the same 32-bit limb. */ +static unsigned int secp256k1_scalar_get_bits(const secp256k1_scalar_t *a, unsigned int offset, unsigned int count); + +/** Access bits from a scalar. Not constant time. */ +static unsigned int secp256k1_scalar_get_bits_var(const secp256k1_scalar_t *a, unsigned int offset, unsigned int count); + +/** Set a scalar from a big endian byte array. */ +static void secp256k1_scalar_set_b32(secp256k1_scalar_t *r, const unsigned char *bin, int *overflow); + +/** Set a scalar to an unsigned integer. */ +static void secp256k1_scalar_set_int(secp256k1_scalar_t *r, unsigned int v); + +/** Convert a scalar to a byte array. */ +static void secp256k1_scalar_get_b32(unsigned char *bin, const secp256k1_scalar_t* a); + +/** Add two scalars together (modulo the group order). Returns whether it overflowed. */ +static int secp256k1_scalar_add(secp256k1_scalar_t *r, const secp256k1_scalar_t *a, const secp256k1_scalar_t *b); + +/** Conditionally add a power of two to a scalar. The result is not allowed to overflow. */ +static void secp256k1_scalar_cadd_bit(secp256k1_scalar_t *r, unsigned int bit, int flag); + +/** Multiply two scalars (modulo the group order). */ +static void secp256k1_scalar_mul(secp256k1_scalar_t *r, const secp256k1_scalar_t *a, const secp256k1_scalar_t *b); + +/** Shift a scalar right by some amount strictly between 0 and 16, returning + * the low bits that were shifted off */ +static int secp256k1_scalar_shr_int(secp256k1_scalar_t *r, int n); + +/** Compute the square of a scalar (modulo the group order). */ +static void secp256k1_scalar_sqr(secp256k1_scalar_t *r, const secp256k1_scalar_t *a); + +/** Compute the inverse of a scalar (modulo the group order). */ +static void secp256k1_scalar_inverse(secp256k1_scalar_t *r, const secp256k1_scalar_t *a); + +/** Compute the inverse of a scalar (modulo the group order), without constant-time guarantee. */ +static void secp256k1_scalar_inverse_var(secp256k1_scalar_t *r, const secp256k1_scalar_t *a); + +/** Compute the complement of a scalar (modulo the group order). */ +static void secp256k1_scalar_negate(secp256k1_scalar_t *r, const secp256k1_scalar_t *a); + +/** Check whether a scalar equals zero. */ +static int secp256k1_scalar_is_zero(const secp256k1_scalar_t *a); + +/** Check whether a scalar equals one. */ +static int secp256k1_scalar_is_one(const secp256k1_scalar_t *a); + +/** Check whether a scalar, considered as an nonnegative integer, is even. */ +static int secp256k1_scalar_is_even(const secp256k1_scalar_t *a); + +/** Check whether a scalar is higher than the group order divided by 2. */ +static int secp256k1_scalar_is_high(const secp256k1_scalar_t *a); + +/** Conditionally negate a number, in constant time. + * Returns -1 if the number was negated, 1 otherwise */ +static int secp256k1_scalar_cond_negate(secp256k1_scalar_t *a, int flag); + +#ifndef USE_NUM_NONE +/** Convert a scalar to a number. */ +static void secp256k1_scalar_get_num(secp256k1_num_t *r, const secp256k1_scalar_t *a); + +/** Get the order of the group as a number. */ +static void secp256k1_scalar_order_get_num(secp256k1_num_t *r); +#endif + +/** Compare two scalars. */ +static int secp256k1_scalar_eq(const secp256k1_scalar_t *a, const secp256k1_scalar_t *b); + +#ifdef USE_ENDOMORPHISM +/** Find r1 and r2 such that r1+r2*2^128 = a. */ +static void secp256k1_scalar_split_128(secp256k1_scalar_t *r1, secp256k1_scalar_t *r2, const secp256k1_scalar_t *a); +/** Find r1 and r2 such that r1+r2*lambda = a, and r1 and r2 are maximum 128 bits long (see secp256k1_gej_mul_lambda). */ +static void secp256k1_scalar_split_lambda(secp256k1_scalar_t *r1, secp256k1_scalar_t *r2, const secp256k1_scalar_t *a); +#endif + +/** Multiply a and b (without taking the modulus!), divide by 2**shift, and round to the nearest integer. Shift must be at least 256. */ +static void secp256k1_scalar_mul_shift_var(secp256k1_scalar_t *r, const secp256k1_scalar_t *a, const secp256k1_scalar_t *b, unsigned int shift); + +#endif diff --git a/bf/secp256k1/src/scalar_4x64.h b/bf/secp256k1/src/scalar_4x64.h new file mode 100644 index 0000000..82899aa --- /dev/null +++ b/bf/secp256k1/src/scalar_4x64.h @@ -0,0 +1,19 @@ +/********************************************************************** + * Copyright (c) 2014 Pieter Wuille * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#ifndef _SECP256K1_SCALAR_REPR_ +#define _SECP256K1_SCALAR_REPR_ + +#include + +/** A scalar modulo the group order of the secp256k1 curve. */ +typedef struct { + uint64_t d[4]; +} secp256k1_scalar_t; + +#define SECP256K1_SCALAR_CONST(d7, d6, d5, d4, d3, d2, d1, d0) {{((uint64_t)(d1)) << 32 | (d0), ((uint64_t)(d3)) << 32 | (d2), ((uint64_t)(d5)) << 32 | (d4), ((uint64_t)(d7)) << 32 | (d6)}} + +#endif diff --git a/bf/secp256k1/src/scalar_4x64_impl.h b/bf/secp256k1/src/scalar_4x64_impl.h new file mode 100644 index 0000000..09431e0 --- /dev/null +++ b/bf/secp256k1/src/scalar_4x64_impl.h @@ -0,0 +1,947 @@ +/********************************************************************** + * Copyright (c) 2013, 2014 Pieter Wuille * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#ifndef _SECP256K1_SCALAR_REPR_IMPL_H_ +#define _SECP256K1_SCALAR_REPR_IMPL_H_ + +/* Limbs of the secp256k1 order. */ +#define SECP256K1_N_0 ((uint64_t)0xBFD25E8CD0364141ULL) +#define SECP256K1_N_1 ((uint64_t)0xBAAEDCE6AF48A03BULL) +#define SECP256K1_N_2 ((uint64_t)0xFFFFFFFFFFFFFFFEULL) +#define SECP256K1_N_3 ((uint64_t)0xFFFFFFFFFFFFFFFFULL) + +/* Limbs of 2^256 minus the secp256k1 order. */ +#define SECP256K1_N_C_0 (~SECP256K1_N_0 + 1) +#define SECP256K1_N_C_1 (~SECP256K1_N_1) +#define SECP256K1_N_C_2 (1) + +/* Limbs of half the secp256k1 order. */ +#define SECP256K1_N_H_0 ((uint64_t)0xDFE92F46681B20A0ULL) +#define SECP256K1_N_H_1 ((uint64_t)0x5D576E7357A4501DULL) +#define SECP256K1_N_H_2 ((uint64_t)0xFFFFFFFFFFFFFFFFULL) +#define SECP256K1_N_H_3 ((uint64_t)0x7FFFFFFFFFFFFFFFULL) + +SECP256K1_INLINE static void secp256k1_scalar_clear(secp256k1_scalar_t *r) { + r->d[0] = 0; + r->d[1] = 0; + r->d[2] = 0; + r->d[3] = 0; +} + +SECP256K1_INLINE static void secp256k1_scalar_set_int(secp256k1_scalar_t *r, unsigned int v) { + r->d[0] = v; + r->d[1] = 0; + r->d[2] = 0; + r->d[3] = 0; +} + +SECP256K1_INLINE static unsigned int secp256k1_scalar_get_bits(const secp256k1_scalar_t *a, unsigned int offset, unsigned int count) { + VERIFY_CHECK((offset + count - 1) >> 6 == offset >> 6); + return (a->d[offset >> 6] >> (offset & 0x3F)) & ((((uint64_t)1) << count) - 1); +} + +SECP256K1_INLINE static unsigned int secp256k1_scalar_get_bits_var(const secp256k1_scalar_t *a, unsigned int offset, unsigned int count) { + VERIFY_CHECK(count < 32); + VERIFY_CHECK(offset + count <= 256); + if ((offset + count - 1) >> 6 == offset >> 6) { + return secp256k1_scalar_get_bits(a, offset, count); + } else { + VERIFY_CHECK((offset >> 6) + 1 < 4); + return ((a->d[offset >> 6] >> (offset & 0x3F)) | (a->d[(offset >> 6) + 1] << (64 - (offset & 0x3F)))) & ((((uint64_t)1) << count) - 1); + } +} + +SECP256K1_INLINE static int secp256k1_scalar_check_overflow(const secp256k1_scalar_t *a) { + int yes = 0; + int no = 0; + no |= (a->d[3] < SECP256K1_N_3); /* No need for a > check. */ + no |= (a->d[2] < SECP256K1_N_2); + yes |= (a->d[2] > SECP256K1_N_2) & ~no; + no |= (a->d[1] < SECP256K1_N_1); + yes |= (a->d[1] > SECP256K1_N_1) & ~no; + yes |= (a->d[0] >= SECP256K1_N_0) & ~no; + return yes; +} + +SECP256K1_INLINE static int secp256k1_scalar_reduce(secp256k1_scalar_t *r, unsigned int overflow) { + uint128_t t; + VERIFY_CHECK(overflow <= 1); + t = (uint128_t)r->d[0] + overflow * SECP256K1_N_C_0; + r->d[0] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64; + t += (uint128_t)r->d[1] + overflow * SECP256K1_N_C_1; + r->d[1] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64; + t += (uint128_t)r->d[2] + overflow * SECP256K1_N_C_2; + r->d[2] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64; + t += (uint64_t)r->d[3]; + r->d[3] = t & 0xFFFFFFFFFFFFFFFFULL; + return overflow; +} + +static int secp256k1_scalar_add(secp256k1_scalar_t *r, const secp256k1_scalar_t *a, const secp256k1_scalar_t *b) { + int overflow; + uint128_t t = (uint128_t)a->d[0] + b->d[0]; + r->d[0] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64; + t += (uint128_t)a->d[1] + b->d[1]; + r->d[1] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64; + t += (uint128_t)a->d[2] + b->d[2]; + r->d[2] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64; + t += (uint128_t)a->d[3] + b->d[3]; + r->d[3] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64; + overflow = t + secp256k1_scalar_check_overflow(r); + VERIFY_CHECK(overflow == 0 || overflow == 1); + secp256k1_scalar_reduce(r, overflow); + return overflow; +} + +static void secp256k1_scalar_cadd_bit(secp256k1_scalar_t *r, unsigned int bit, int flag) { + uint128_t t; + VERIFY_CHECK(bit < 256); + bit += ((uint32_t) flag - 1) & 0x100; /* forcing (bit >> 6) > 3 makes this a noop */ + t = (uint128_t)r->d[0] + (((uint64_t)((bit >> 6) == 0)) << (bit & 0x3F)); + r->d[0] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64; + t += (uint128_t)r->d[1] + (((uint64_t)((bit >> 6) == 1)) << (bit & 0x3F)); + r->d[1] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64; + t += (uint128_t)r->d[2] + (((uint64_t)((bit >> 6) == 2)) << (bit & 0x3F)); + r->d[2] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64; + t += (uint128_t)r->d[3] + (((uint64_t)((bit >> 6) == 3)) << (bit & 0x3F)); + r->d[3] = t & 0xFFFFFFFFFFFFFFFFULL; +#ifdef VERIFY + VERIFY_CHECK((t >> 64) == 0); + VERIFY_CHECK(secp256k1_scalar_check_overflow(r) == 0); +#endif +} + +static void secp256k1_scalar_set_b32(secp256k1_scalar_t *r, const unsigned char *b32, int *overflow) { + int over; + r->d[0] = (uint64_t)b32[31] | (uint64_t)b32[30] << 8 | (uint64_t)b32[29] << 16 | (uint64_t)b32[28] << 24 | (uint64_t)b32[27] << 32 | (uint64_t)b32[26] << 40 | (uint64_t)b32[25] << 48 | (uint64_t)b32[24] << 56; + r->d[1] = (uint64_t)b32[23] | (uint64_t)b32[22] << 8 | (uint64_t)b32[21] << 16 | (uint64_t)b32[20] << 24 | (uint64_t)b32[19] << 32 | (uint64_t)b32[18] << 40 | (uint64_t)b32[17] << 48 | (uint64_t)b32[16] << 56; + r->d[2] = (uint64_t)b32[15] | (uint64_t)b32[14] << 8 | (uint64_t)b32[13] << 16 | (uint64_t)b32[12] << 24 | (uint64_t)b32[11] << 32 | (uint64_t)b32[10] << 40 | (uint64_t)b32[9] << 48 | (uint64_t)b32[8] << 56; + r->d[3] = (uint64_t)b32[7] | (uint64_t)b32[6] << 8 | (uint64_t)b32[5] << 16 | (uint64_t)b32[4] << 24 | (uint64_t)b32[3] << 32 | (uint64_t)b32[2] << 40 | (uint64_t)b32[1] << 48 | (uint64_t)b32[0] << 56; + over = secp256k1_scalar_reduce(r, secp256k1_scalar_check_overflow(r)); + if (overflow) { + *overflow = over; + } +} + +static void secp256k1_scalar_get_b32(unsigned char *bin, const secp256k1_scalar_t* a) { + bin[0] = a->d[3] >> 56; bin[1] = a->d[3] >> 48; bin[2] = a->d[3] >> 40; bin[3] = a->d[3] >> 32; bin[4] = a->d[3] >> 24; bin[5] = a->d[3] >> 16; bin[6] = a->d[3] >> 8; bin[7] = a->d[3]; + bin[8] = a->d[2] >> 56; bin[9] = a->d[2] >> 48; bin[10] = a->d[2] >> 40; bin[11] = a->d[2] >> 32; bin[12] = a->d[2] >> 24; bin[13] = a->d[2] >> 16; bin[14] = a->d[2] >> 8; bin[15] = a->d[2]; + bin[16] = a->d[1] >> 56; bin[17] = a->d[1] >> 48; bin[18] = a->d[1] >> 40; bin[19] = a->d[1] >> 32; bin[20] = a->d[1] >> 24; bin[21] = a->d[1] >> 16; bin[22] = a->d[1] >> 8; bin[23] = a->d[1]; + bin[24] = a->d[0] >> 56; bin[25] = a->d[0] >> 48; bin[26] = a->d[0] >> 40; bin[27] = a->d[0] >> 32; bin[28] = a->d[0] >> 24; bin[29] = a->d[0] >> 16; bin[30] = a->d[0] >> 8; bin[31] = a->d[0]; +} + +SECP256K1_INLINE static int secp256k1_scalar_is_zero(const secp256k1_scalar_t *a) { + return (a->d[0] | a->d[1] | a->d[2] | a->d[3]) == 0; +} + +static void secp256k1_scalar_negate(secp256k1_scalar_t *r, const secp256k1_scalar_t *a) { + uint64_t nonzero = 0xFFFFFFFFFFFFFFFFULL * (secp256k1_scalar_is_zero(a) == 0); + uint128_t t = (uint128_t)(~a->d[0]) + SECP256K1_N_0 + 1; + r->d[0] = t & nonzero; t >>= 64; + t += (uint128_t)(~a->d[1]) + SECP256K1_N_1; + r->d[1] = t & nonzero; t >>= 64; + t += (uint128_t)(~a->d[2]) + SECP256K1_N_2; + r->d[2] = t & nonzero; t >>= 64; + t += (uint128_t)(~a->d[3]) + SECP256K1_N_3; + r->d[3] = t & nonzero; +} + +SECP256K1_INLINE static int secp256k1_scalar_is_one(const secp256k1_scalar_t *a) { + return ((a->d[0] ^ 1) | a->d[1] | a->d[2] | a->d[3]) == 0; +} + +static int secp256k1_scalar_is_high(const secp256k1_scalar_t *a) { + int yes = 0; + int no = 0; + no |= (a->d[3] < SECP256K1_N_H_3); + yes |= (a->d[3] > SECP256K1_N_H_3) & ~no; + no |= (a->d[2] < SECP256K1_N_H_2) & ~yes; /* No need for a > check. */ + no |= (a->d[1] < SECP256K1_N_H_1) & ~yes; + yes |= (a->d[1] > SECP256K1_N_H_1) & ~no; + yes |= (a->d[0] > SECP256K1_N_H_0) & ~no; + return yes; +} + +static int secp256k1_scalar_cond_negate(secp256k1_scalar_t *r, int flag) { + /* If we are flag = 0, mask = 00...00 and this is a no-op; + * if we are flag = 1, mask = 11...11 and this is identical to secp256k1_scalar_negate */ + uint64_t mask = !flag - 1; + uint64_t nonzero = (secp256k1_scalar_is_zero(r) != 0) - 1; + uint128_t t = (uint128_t)(r->d[0] ^ mask) + ((SECP256K1_N_0 + 1) & mask); + r->d[0] = t & nonzero; t >>= 64; + t += (uint128_t)(r->d[1] ^ mask) + (SECP256K1_N_1 & mask); + r->d[1] = t & nonzero; t >>= 64; + t += (uint128_t)(r->d[2] ^ mask) + (SECP256K1_N_2 & mask); + r->d[2] = t & nonzero; t >>= 64; + t += (uint128_t)(r->d[3] ^ mask) + (SECP256K1_N_3 & mask); + r->d[3] = t & nonzero; + return 2 * (mask == 0) - 1; +} + +/* Inspired by the macros in OpenSSL's crypto/bn/asm/x86_64-gcc.c. */ + +/** Add a*b to the number defined by (c0,c1,c2). c2 must never overflow. */ +#define muladd(a,b) { \ + uint64_t tl, th; \ + { \ + uint128_t t = (uint128_t)a * b; \ + th = t >> 64; /* at most 0xFFFFFFFFFFFFFFFE */ \ + tl = t; \ + } \ + c0 += tl; /* overflow is handled on the next line */ \ + th += (c0 < tl) ? 1 : 0; /* at most 0xFFFFFFFFFFFFFFFF */ \ + c1 += th; /* overflow is handled on the next line */ \ + c2 += (c1 < th) ? 1 : 0; /* never overflows by contract (verified in the next line) */ \ + VERIFY_CHECK((c1 >= th) || (c2 != 0)); \ +} + +/** Add a*b to the number defined by (c0,c1). c1 must never overflow. */ +#define muladd_fast(a,b) { \ + uint64_t tl, th; \ + { \ + uint128_t t = (uint128_t)a * b; \ + th = t >> 64; /* at most 0xFFFFFFFFFFFFFFFE */ \ + tl = t; \ + } \ + c0 += tl; /* overflow is handled on the next line */ \ + th += (c0 < tl) ? 1 : 0; /* at most 0xFFFFFFFFFFFFFFFF */ \ + c1 += th; /* never overflows by contract (verified in the next line) */ \ + VERIFY_CHECK(c1 >= th); \ +} + +/** Add 2*a*b to the number defined by (c0,c1,c2). c2 must never overflow. */ +#define muladd2(a,b) { \ + uint64_t tl, th, th2, tl2; \ + { \ + uint128_t t = (uint128_t)a * b; \ + th = t >> 64; /* at most 0xFFFFFFFFFFFFFFFE */ \ + tl = t; \ + } \ + th2 = th + th; /* at most 0xFFFFFFFFFFFFFFFE (in case th was 0x7FFFFFFFFFFFFFFF) */ \ + c2 += (th2 < th) ? 1 : 0; /* never overflows by contract (verified the next line) */ \ + VERIFY_CHECK((th2 >= th) || (c2 != 0)); \ + tl2 = tl + tl; /* at most 0xFFFFFFFFFFFFFFFE (in case the lowest 63 bits of tl were 0x7FFFFFFFFFFFFFFF) */ \ + th2 += (tl2 < tl) ? 1 : 0; /* at most 0xFFFFFFFFFFFFFFFF */ \ + c0 += tl2; /* overflow is handled on the next line */ \ + th2 += (c0 < tl2) ? 1 : 0; /* second overflow is handled on the next line */ \ + c2 += (c0 < tl2) & (th2 == 0); /* never overflows by contract (verified the next line) */ \ + VERIFY_CHECK((c0 >= tl2) || (th2 != 0) || (c2 != 0)); \ + c1 += th2; /* overflow is handled on the next line */ \ + c2 += (c1 < th2) ? 1 : 0; /* never overflows by contract (verified the next line) */ \ + VERIFY_CHECK((c1 >= th2) || (c2 != 0)); \ +} + +/** Add a to the number defined by (c0,c1,c2). c2 must never overflow. */ +#define sumadd(a) { \ + unsigned int over; \ + c0 += (a); /* overflow is handled on the next line */ \ + over = (c0 < (a)) ? 1 : 0; \ + c1 += over; /* overflow is handled on the next line */ \ + c2 += (c1 < over) ? 1 : 0; /* never overflows by contract */ \ +} + +/** Add a to the number defined by (c0,c1). c1 must never overflow, c2 must be zero. */ +#define sumadd_fast(a) { \ + c0 += (a); /* overflow is handled on the next line */ \ + c1 += (c0 < (a)) ? 1 : 0; /* never overflows by contract (verified the next line) */ \ + VERIFY_CHECK((c1 != 0) | (c0 >= (a))); \ + VERIFY_CHECK(c2 == 0); \ +} + +/** Extract the lowest 64 bits of (c0,c1,c2) into n, and left shift the number 64 bits. */ +#define extract(n) { \ + (n) = c0; \ + c0 = c1; \ + c1 = c2; \ + c2 = 0; \ +} + +/** Extract the lowest 64 bits of (c0,c1,c2) into n, and left shift the number 64 bits. c2 is required to be zero. */ +#define extract_fast(n) { \ + (n) = c0; \ + c0 = c1; \ + c1 = 0; \ + VERIFY_CHECK(c2 == 0); \ +} + +static void secp256k1_scalar_reduce_512(secp256k1_scalar_t *r, const uint64_t *l) { +#ifdef USE_ASM_X86_64 + /* Reduce 512 bits into 385. */ + uint64_t m0, m1, m2, m3, m4, m5, m6; + uint64_t p0, p1, p2, p3, p4; + uint64_t c; + + __asm__ __volatile__( + /* Preload. */ + "movq 32(%%rsi), %%r11\n" + "movq 40(%%rsi), %%r12\n" + "movq 48(%%rsi), %%r13\n" + "movq 56(%%rsi), %%r14\n" + /* Initialize r8,r9,r10 */ + "movq 0(%%rsi), %%r8\n" + "movq $0, %%r9\n" + "movq $0, %%r10\n" + /* (r8,r9) += n0 * c0 */ + "movq %8, %%rax\n" + "mulq %%r11\n" + "addq %%rax, %%r8\n" + "adcq %%rdx, %%r9\n" + /* extract m0 */ + "movq %%r8, %q0\n" + "movq $0, %%r8\n" + /* (r9,r10) += l1 */ + "addq 8(%%rsi), %%r9\n" + "adcq $0, %%r10\n" + /* (r9,r10,r8) += n1 * c0 */ + "movq %8, %%rax\n" + "mulq %%r12\n" + "addq %%rax, %%r9\n" + "adcq %%rdx, %%r10\n" + "adcq $0, %%r8\n" + /* (r9,r10,r8) += n0 * c1 */ + "movq %9, %%rax\n" + "mulq %%r11\n" + "addq %%rax, %%r9\n" + "adcq %%rdx, %%r10\n" + "adcq $0, %%r8\n" + /* extract m1 */ + "movq %%r9, %q1\n" + "movq $0, %%r9\n" + /* (r10,r8,r9) += l2 */ + "addq 16(%%rsi), %%r10\n" + "adcq $0, %%r8\n" + "adcq $0, %%r9\n" + /* (r10,r8,r9) += n2 * c0 */ + "movq %8, %%rax\n" + "mulq %%r13\n" + "addq %%rax, %%r10\n" + "adcq %%rdx, %%r8\n" + "adcq $0, %%r9\n" + /* (r10,r8,r9) += n1 * c1 */ + "movq %9, %%rax\n" + "mulq %%r12\n" + "addq %%rax, %%r10\n" + "adcq %%rdx, %%r8\n" + "adcq $0, %%r9\n" + /* (r10,r8,r9) += n0 */ + "addq %%r11, %%r10\n" + "adcq $0, %%r8\n" + "adcq $0, %%r9\n" + /* extract m2 */ + "movq %%r10, %q2\n" + "movq $0, %%r10\n" + /* (r8,r9,r10) += l3 */ + "addq 24(%%rsi), %%r8\n" + "adcq $0, %%r9\n" + "adcq $0, %%r10\n" + /* (r8,r9,r10) += n3 * c0 */ + "movq %8, %%rax\n" + "mulq %%r14\n" + "addq %%rax, %%r8\n" + "adcq %%rdx, %%r9\n" + "adcq $0, %%r10\n" + /* (r8,r9,r10) += n2 * c1 */ + "movq %9, %%rax\n" + "mulq %%r13\n" + "addq %%rax, %%r8\n" + "adcq %%rdx, %%r9\n" + "adcq $0, %%r10\n" + /* (r8,r9,r10) += n1 */ + "addq %%r12, %%r8\n" + "adcq $0, %%r9\n" + "adcq $0, %%r10\n" + /* extract m3 */ + "movq %%r8, %q3\n" + "movq $0, %%r8\n" + /* (r9,r10,r8) += n3 * c1 */ + "movq %9, %%rax\n" + "mulq %%r14\n" + "addq %%rax, %%r9\n" + "adcq %%rdx, %%r10\n" + "adcq $0, %%r8\n" + /* (r9,r10,r8) += n2 */ + "addq %%r13, %%r9\n" + "adcq $0, %%r10\n" + "adcq $0, %%r8\n" + /* extract m4 */ + "movq %%r9, %q4\n" + /* (r10,r8) += n3 */ + "addq %%r14, %%r10\n" + "adcq $0, %%r8\n" + /* extract m5 */ + "movq %%r10, %q5\n" + /* extract m6 */ + "movq %%r8, %q6\n" + : "=g"(m0), "=g"(m1), "=g"(m2), "=g"(m3), "=g"(m4), "=g"(m5), "=g"(m6) + : "S"(l), "n"(SECP256K1_N_C_0), "n"(SECP256K1_N_C_1) + : "rax", "rdx", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "cc"); + + /* Reduce 385 bits into 258. */ + __asm__ __volatile__( + /* Preload */ + "movq %q9, %%r11\n" + "movq %q10, %%r12\n" + "movq %q11, %%r13\n" + /* Initialize (r8,r9,r10) */ + "movq %q5, %%r8\n" + "movq $0, %%r9\n" + "movq $0, %%r10\n" + /* (r8,r9) += m4 * c0 */ + "movq %12, %%rax\n" + "mulq %%r11\n" + "addq %%rax, %%r8\n" + "adcq %%rdx, %%r9\n" + /* extract p0 */ + "movq %%r8, %q0\n" + "movq $0, %%r8\n" + /* (r9,r10) += m1 */ + "addq %q6, %%r9\n" + "adcq $0, %%r10\n" + /* (r9,r10,r8) += m5 * c0 */ + "movq %12, %%rax\n" + "mulq %%r12\n" + "addq %%rax, %%r9\n" + "adcq %%rdx, %%r10\n" + "adcq $0, %%r8\n" + /* (r9,r10,r8) += m4 * c1 */ + "movq %13, %%rax\n" + "mulq %%r11\n" + "addq %%rax, %%r9\n" + "adcq %%rdx, %%r10\n" + "adcq $0, %%r8\n" + /* extract p1 */ + "movq %%r9, %q1\n" + "movq $0, %%r9\n" + /* (r10,r8,r9) += m2 */ + "addq %q7, %%r10\n" + "adcq $0, %%r8\n" + "adcq $0, %%r9\n" + /* (r10,r8,r9) += m6 * c0 */ + "movq %12, %%rax\n" + "mulq %%r13\n" + "addq %%rax, %%r10\n" + "adcq %%rdx, %%r8\n" + "adcq $0, %%r9\n" + /* (r10,r8,r9) += m5 * c1 */ + "movq %13, %%rax\n" + "mulq %%r12\n" + "addq %%rax, %%r10\n" + "adcq %%rdx, %%r8\n" + "adcq $0, %%r9\n" + /* (r10,r8,r9) += m4 */ + "addq %%r11, %%r10\n" + "adcq $0, %%r8\n" + "adcq $0, %%r9\n" + /* extract p2 */ + "movq %%r10, %q2\n" + /* (r8,r9) += m3 */ + "addq %q8, %%r8\n" + "adcq $0, %%r9\n" + /* (r8,r9) += m6 * c1 */ + "movq %13, %%rax\n" + "mulq %%r13\n" + "addq %%rax, %%r8\n" + "adcq %%rdx, %%r9\n" + /* (r8,r9) += m5 */ + "addq %%r12, %%r8\n" + "adcq $0, %%r9\n" + /* extract p3 */ + "movq %%r8, %q3\n" + /* (r9) += m6 */ + "addq %%r13, %%r9\n" + /* extract p4 */ + "movq %%r9, %q4\n" + : "=&g"(p0), "=&g"(p1), "=&g"(p2), "=g"(p3), "=g"(p4) + : "g"(m0), "g"(m1), "g"(m2), "g"(m3), "g"(m4), "g"(m5), "g"(m6), "n"(SECP256K1_N_C_0), "n"(SECP256K1_N_C_1) + : "rax", "rdx", "r8", "r9", "r10", "r11", "r12", "r13", "cc"); + + /* Reduce 258 bits into 256. */ + __asm__ __volatile__( + /* Preload */ + "movq %q5, %%r10\n" + /* (rax,rdx) = p4 * c0 */ + "movq %7, %%rax\n" + "mulq %%r10\n" + /* (rax,rdx) += p0 */ + "addq %q1, %%rax\n" + "adcq $0, %%rdx\n" + /* extract r0 */ + "movq %%rax, 0(%q6)\n" + /* Move to (r8,r9) */ + "movq %%rdx, %%r8\n" + "movq $0, %%r9\n" + /* (r8,r9) += p1 */ + "addq %q2, %%r8\n" + "adcq $0, %%r9\n" + /* (r8,r9) += p4 * c1 */ + "movq %8, %%rax\n" + "mulq %%r10\n" + "addq %%rax, %%r8\n" + "adcq %%rdx, %%r9\n" + /* Extract r1 */ + "movq %%r8, 8(%q6)\n" + "movq $0, %%r8\n" + /* (r9,r8) += p4 */ + "addq %%r10, %%r9\n" + "adcq $0, %%r8\n" + /* (r9,r8) += p2 */ + "addq %q3, %%r9\n" + "adcq $0, %%r8\n" + /* Extract r2 */ + "movq %%r9, 16(%q6)\n" + "movq $0, %%r9\n" + /* (r8,r9) += p3 */ + "addq %q4, %%r8\n" + "adcq $0, %%r9\n" + /* Extract r3 */ + "movq %%r8, 24(%q6)\n" + /* Extract c */ + "movq %%r9, %q0\n" + : "=g"(c) + : "g"(p0), "g"(p1), "g"(p2), "g"(p3), "g"(p4), "D"(r), "n"(SECP256K1_N_C_0), "n"(SECP256K1_N_C_1) + : "rax", "rdx", "r8", "r9", "r10", "cc", "memory"); +#else + uint128_t c; + uint64_t c0, c1, c2; + uint64_t n0 = l[4], n1 = l[5], n2 = l[6], n3 = l[7]; + uint64_t m0, m1, m2, m3, m4, m5; + uint32_t m6; + uint64_t p0, p1, p2, p3; + uint32_t p4; + + /* Reduce 512 bits into 385. */ + /* m[0..6] = l[0..3] + n[0..3] * SECP256K1_N_C. */ + c0 = l[0]; c1 = 0; c2 = 0; + muladd_fast(n0, SECP256K1_N_C_0); + extract_fast(m0); + sumadd_fast(l[1]); + muladd(n1, SECP256K1_N_C_0); + muladd(n0, SECP256K1_N_C_1); + extract(m1); + sumadd(l[2]); + muladd(n2, SECP256K1_N_C_0); + muladd(n1, SECP256K1_N_C_1); + sumadd(n0); + extract(m2); + sumadd(l[3]); + muladd(n3, SECP256K1_N_C_0); + muladd(n2, SECP256K1_N_C_1); + sumadd(n1); + extract(m3); + muladd(n3, SECP256K1_N_C_1); + sumadd(n2); + extract(m4); + sumadd_fast(n3); + extract_fast(m5); + VERIFY_CHECK(c0 <= 1); + m6 = c0; + + /* Reduce 385 bits into 258. */ + /* p[0..4] = m[0..3] + m[4..6] * SECP256K1_N_C. */ + c0 = m0; c1 = 0; c2 = 0; + muladd_fast(m4, SECP256K1_N_C_0); + extract_fast(p0); + sumadd_fast(m1); + muladd(m5, SECP256K1_N_C_0); + muladd(m4, SECP256K1_N_C_1); + extract(p1); + sumadd(m2); + muladd(m6, SECP256K1_N_C_0); + muladd(m5, SECP256K1_N_C_1); + sumadd(m4); + extract(p2); + sumadd_fast(m3); + muladd_fast(m6, SECP256K1_N_C_1); + sumadd_fast(m5); + extract_fast(p3); + p4 = c0 + m6; + VERIFY_CHECK(p4 <= 2); + + /* Reduce 258 bits into 256. */ + /* r[0..3] = p[0..3] + p[4] * SECP256K1_N_C. */ + c = p0 + (uint128_t)SECP256K1_N_C_0 * p4; + r->d[0] = c & 0xFFFFFFFFFFFFFFFFULL; c >>= 64; + c += p1 + (uint128_t)SECP256K1_N_C_1 * p4; + r->d[1] = c & 0xFFFFFFFFFFFFFFFFULL; c >>= 64; + c += p2 + (uint128_t)p4; + r->d[2] = c & 0xFFFFFFFFFFFFFFFFULL; c >>= 64; + c += p3; + r->d[3] = c & 0xFFFFFFFFFFFFFFFFULL; c >>= 64; +#endif + + /* Final reduction of r. */ + secp256k1_scalar_reduce(r, c + secp256k1_scalar_check_overflow(r)); +} + +static void secp256k1_scalar_mul_512(uint64_t l[8], const secp256k1_scalar_t *a, const secp256k1_scalar_t *b) { +#ifdef USE_ASM_X86_64 + const uint64_t *pb = b->d; + __asm__ __volatile__( + /* Preload */ + "movq 0(%%rdi), %%r15\n" + "movq 8(%%rdi), %%rbx\n" + "movq 16(%%rdi), %%rcx\n" + "movq 0(%%rdx), %%r11\n" + "movq 8(%%rdx), %%r12\n" + "movq 16(%%rdx), %%r13\n" + "movq 24(%%rdx), %%r14\n" + /* (rax,rdx) = a0 * b0 */ + "movq %%r15, %%rax\n" + "mulq %%r11\n" + /* Extract l0 */ + "movq %%rax, 0(%%rsi)\n" + /* (r8,r9,r10) = (rdx) */ + "movq %%rdx, %%r8\n" + "xorq %%r9, %%r9\n" + "xorq %%r10, %%r10\n" + /* (r8,r9,r10) += a0 * b1 */ + "movq %%r15, %%rax\n" + "mulq %%r12\n" + "addq %%rax, %%r8\n" + "adcq %%rdx, %%r9\n" + "adcq $0, %%r10\n" + /* (r8,r9,r10) += a1 * b0 */ + "movq %%rbx, %%rax\n" + "mulq %%r11\n" + "addq %%rax, %%r8\n" + "adcq %%rdx, %%r9\n" + "adcq $0, %%r10\n" + /* Extract l1 */ + "movq %%r8, 8(%%rsi)\n" + "xorq %%r8, %%r8\n" + /* (r9,r10,r8) += a0 * b2 */ + "movq %%r15, %%rax\n" + "mulq %%r13\n" + "addq %%rax, %%r9\n" + "adcq %%rdx, %%r10\n" + "adcq $0, %%r8\n" + /* (r9,r10,r8) += a1 * b1 */ + "movq %%rbx, %%rax\n" + "mulq %%r12\n" + "addq %%rax, %%r9\n" + "adcq %%rdx, %%r10\n" + "adcq $0, %%r8\n" + /* (r9,r10,r8) += a2 * b0 */ + "movq %%rcx, %%rax\n" + "mulq %%r11\n" + "addq %%rax, %%r9\n" + "adcq %%rdx, %%r10\n" + "adcq $0, %%r8\n" + /* Extract l2 */ + "movq %%r9, 16(%%rsi)\n" + "xorq %%r9, %%r9\n" + /* (r10,r8,r9) += a0 * b3 */ + "movq %%r15, %%rax\n" + "mulq %%r14\n" + "addq %%rax, %%r10\n" + "adcq %%rdx, %%r8\n" + "adcq $0, %%r9\n" + /* Preload a3 */ + "movq 24(%%rdi), %%r15\n" + /* (r10,r8,r9) += a1 * b2 */ + "movq %%rbx, %%rax\n" + "mulq %%r13\n" + "addq %%rax, %%r10\n" + "adcq %%rdx, %%r8\n" + "adcq $0, %%r9\n" + /* (r10,r8,r9) += a2 * b1 */ + "movq %%rcx, %%rax\n" + "mulq %%r12\n" + "addq %%rax, %%r10\n" + "adcq %%rdx, %%r8\n" + "adcq $0, %%r9\n" + /* (r10,r8,r9) += a3 * b0 */ + "movq %%r15, %%rax\n" + "mulq %%r11\n" + "addq %%rax, %%r10\n" + "adcq %%rdx, %%r8\n" + "adcq $0, %%r9\n" + /* Extract l3 */ + "movq %%r10, 24(%%rsi)\n" + "xorq %%r10, %%r10\n" + /* (r8,r9,r10) += a1 * b3 */ + "movq %%rbx, %%rax\n" + "mulq %%r14\n" + "addq %%rax, %%r8\n" + "adcq %%rdx, %%r9\n" + "adcq $0, %%r10\n" + /* (r8,r9,r10) += a2 * b2 */ + "movq %%rcx, %%rax\n" + "mulq %%r13\n" + "addq %%rax, %%r8\n" + "adcq %%rdx, %%r9\n" + "adcq $0, %%r10\n" + /* (r8,r9,r10) += a3 * b1 */ + "movq %%r15, %%rax\n" + "mulq %%r12\n" + "addq %%rax, %%r8\n" + "adcq %%rdx, %%r9\n" + "adcq $0, %%r10\n" + /* Extract l4 */ + "movq %%r8, 32(%%rsi)\n" + "xorq %%r8, %%r8\n" + /* (r9,r10,r8) += a2 * b3 */ + "movq %%rcx, %%rax\n" + "mulq %%r14\n" + "addq %%rax, %%r9\n" + "adcq %%rdx, %%r10\n" + "adcq $0, %%r8\n" + /* (r9,r10,r8) += a3 * b2 */ + "movq %%r15, %%rax\n" + "mulq %%r13\n" + "addq %%rax, %%r9\n" + "adcq %%rdx, %%r10\n" + "adcq $0, %%r8\n" + /* Extract l5 */ + "movq %%r9, 40(%%rsi)\n" + /* (r10,r8) += a3 * b3 */ + "movq %%r15, %%rax\n" + "mulq %%r14\n" + "addq %%rax, %%r10\n" + "adcq %%rdx, %%r8\n" + /* Extract l6 */ + "movq %%r10, 48(%%rsi)\n" + /* Extract l7 */ + "movq %%r8, 56(%%rsi)\n" + : "+d"(pb) + : "S"(l), "D"(a->d) + : "rax", "rbx", "rcx", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "cc", "memory"); +#else + /* 160 bit accumulator. */ + uint64_t c0 = 0, c1 = 0; + uint32_t c2 = 0; + + /* l[0..7] = a[0..3] * b[0..3]. */ + muladd_fast(a->d[0], b->d[0]); + extract_fast(l[0]); + muladd(a->d[0], b->d[1]); + muladd(a->d[1], b->d[0]); + extract(l[1]); + muladd(a->d[0], b->d[2]); + muladd(a->d[1], b->d[1]); + muladd(a->d[2], b->d[0]); + extract(l[2]); + muladd(a->d[0], b->d[3]); + muladd(a->d[1], b->d[2]); + muladd(a->d[2], b->d[1]); + muladd(a->d[3], b->d[0]); + extract(l[3]); + muladd(a->d[1], b->d[3]); + muladd(a->d[2], b->d[2]); + muladd(a->d[3], b->d[1]); + extract(l[4]); + muladd(a->d[2], b->d[3]); + muladd(a->d[3], b->d[2]); + extract(l[5]); + muladd_fast(a->d[3], b->d[3]); + extract_fast(l[6]); + VERIFY_CHECK(c1 <= 0); + l[7] = c0; +#endif +} + +static void secp256k1_scalar_sqr_512(uint64_t l[8], const secp256k1_scalar_t *a) { +#ifdef USE_ASM_X86_64 + __asm__ __volatile__( + /* Preload */ + "movq 0(%%rdi), %%r11\n" + "movq 8(%%rdi), %%r12\n" + "movq 16(%%rdi), %%r13\n" + "movq 24(%%rdi), %%r14\n" + /* (rax,rdx) = a0 * a0 */ + "movq %%r11, %%rax\n" + "mulq %%r11\n" + /* Extract l0 */ + "movq %%rax, 0(%%rsi)\n" + /* (r8,r9,r10) = (rdx,0) */ + "movq %%rdx, %%r8\n" + "xorq %%r9, %%r9\n" + "xorq %%r10, %%r10\n" + /* (r8,r9,r10) += 2 * a0 * a1 */ + "movq %%r11, %%rax\n" + "mulq %%r12\n" + "addq %%rax, %%r8\n" + "adcq %%rdx, %%r9\n" + "adcq $0, %%r10\n" + "addq %%rax, %%r8\n" + "adcq %%rdx, %%r9\n" + "adcq $0, %%r10\n" + /* Extract l1 */ + "movq %%r8, 8(%%rsi)\n" + "xorq %%r8, %%r8\n" + /* (r9,r10,r8) += 2 * a0 * a2 */ + "movq %%r11, %%rax\n" + "mulq %%r13\n" + "addq %%rax, %%r9\n" + "adcq %%rdx, %%r10\n" + "adcq $0, %%r8\n" + "addq %%rax, %%r9\n" + "adcq %%rdx, %%r10\n" + "adcq $0, %%r8\n" + /* (r9,r10,r8) += a1 * a1 */ + "movq %%r12, %%rax\n" + "mulq %%r12\n" + "addq %%rax, %%r9\n" + "adcq %%rdx, %%r10\n" + "adcq $0, %%r8\n" + /* Extract l2 */ + "movq %%r9, 16(%%rsi)\n" + "xorq %%r9, %%r9\n" + /* (r10,r8,r9) += 2 * a0 * a3 */ + "movq %%r11, %%rax\n" + "mulq %%r14\n" + "addq %%rax, %%r10\n" + "adcq %%rdx, %%r8\n" + "adcq $0, %%r9\n" + "addq %%rax, %%r10\n" + "adcq %%rdx, %%r8\n" + "adcq $0, %%r9\n" + /* (r10,r8,r9) += 2 * a1 * a2 */ + "movq %%r12, %%rax\n" + "mulq %%r13\n" + "addq %%rax, %%r10\n" + "adcq %%rdx, %%r8\n" + "adcq $0, %%r9\n" + "addq %%rax, %%r10\n" + "adcq %%rdx, %%r8\n" + "adcq $0, %%r9\n" + /* Extract l3 */ + "movq %%r10, 24(%%rsi)\n" + "xorq %%r10, %%r10\n" + /* (r8,r9,r10) += 2 * a1 * a3 */ + "movq %%r12, %%rax\n" + "mulq %%r14\n" + "addq %%rax, %%r8\n" + "adcq %%rdx, %%r9\n" + "adcq $0, %%r10\n" + "addq %%rax, %%r8\n" + "adcq %%rdx, %%r9\n" + "adcq $0, %%r10\n" + /* (r8,r9,r10) += a2 * a2 */ + "movq %%r13, %%rax\n" + "mulq %%r13\n" + "addq %%rax, %%r8\n" + "adcq %%rdx, %%r9\n" + "adcq $0, %%r10\n" + /* Extract l4 */ + "movq %%r8, 32(%%rsi)\n" + "xorq %%r8, %%r8\n" + /* (r9,r10,r8) += 2 * a2 * a3 */ + "movq %%r13, %%rax\n" + "mulq %%r14\n" + "addq %%rax, %%r9\n" + "adcq %%rdx, %%r10\n" + "adcq $0, %%r8\n" + "addq %%rax, %%r9\n" + "adcq %%rdx, %%r10\n" + "adcq $0, %%r8\n" + /* Extract l5 */ + "movq %%r9, 40(%%rsi)\n" + /* (r10,r8) += a3 * a3 */ + "movq %%r14, %%rax\n" + "mulq %%r14\n" + "addq %%rax, %%r10\n" + "adcq %%rdx, %%r8\n" + /* Extract l6 */ + "movq %%r10, 48(%%rsi)\n" + /* Extract l7 */ + "movq %%r8, 56(%%rsi)\n" + : + : "S"(l), "D"(a->d) + : "rax", "rdx", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "cc", "memory"); +#else + /* 160 bit accumulator. */ + uint64_t c0 = 0, c1 = 0; + uint32_t c2 = 0; + + /* l[0..7] = a[0..3] * b[0..3]. */ + muladd_fast(a->d[0], a->d[0]); + extract_fast(l[0]); + muladd2(a->d[0], a->d[1]); + extract(l[1]); + muladd2(a->d[0], a->d[2]); + muladd(a->d[1], a->d[1]); + extract(l[2]); + muladd2(a->d[0], a->d[3]); + muladd2(a->d[1], a->d[2]); + extract(l[3]); + muladd2(a->d[1], a->d[3]); + muladd(a->d[2], a->d[2]); + extract(l[4]); + muladd2(a->d[2], a->d[3]); + extract(l[5]); + muladd_fast(a->d[3], a->d[3]); + extract_fast(l[6]); + VERIFY_CHECK(c1 == 0); + l[7] = c0; +#endif +} + +#undef sumadd +#undef sumadd_fast +#undef muladd +#undef muladd_fast +#undef muladd2 +#undef extract +#undef extract_fast + +static void secp256k1_scalar_mul(secp256k1_scalar_t *r, const secp256k1_scalar_t *a, const secp256k1_scalar_t *b) { + uint64_t l[8]; + secp256k1_scalar_mul_512(l, a, b); + secp256k1_scalar_reduce_512(r, l); +} + +static int secp256k1_scalar_shr_int(secp256k1_scalar_t *r, int n) { + int ret; + VERIFY_CHECK(n > 0); + VERIFY_CHECK(n < 16); + ret = r->d[0] & ((1 << n) - 1); + r->d[0] = (r->d[0] >> n) + (r->d[1] << (64 - n)); + r->d[1] = (r->d[1] >> n) + (r->d[2] << (64 - n)); + r->d[2] = (r->d[2] >> n) + (r->d[3] << (64 - n)); + r->d[3] = (r->d[3] >> n); + return ret; +} + +static void secp256k1_scalar_sqr(secp256k1_scalar_t *r, const secp256k1_scalar_t *a) { + uint64_t l[8]; + secp256k1_scalar_sqr_512(l, a); + secp256k1_scalar_reduce_512(r, l); +} + +static void secp256k1_scalar_split_128(secp256k1_scalar_t *r1, secp256k1_scalar_t *r2, const secp256k1_scalar_t *a) { + r1->d[0] = a->d[0]; + r1->d[1] = a->d[1]; + r1->d[2] = 0; + r1->d[3] = 0; + r2->d[0] = a->d[2]; + r2->d[1] = a->d[3]; + r2->d[2] = 0; + r2->d[3] = 0; +} + +SECP256K1_INLINE static int secp256k1_scalar_eq(const secp256k1_scalar_t *a, const secp256k1_scalar_t *b) { + return ((a->d[0] ^ b->d[0]) | (a->d[1] ^ b->d[1]) | (a->d[2] ^ b->d[2]) | (a->d[3] ^ b->d[3])) == 0; +} + +SECP256K1_INLINE static void secp256k1_scalar_mul_shift_var(secp256k1_scalar_t *r, const secp256k1_scalar_t *a, const secp256k1_scalar_t *b, unsigned int shift) { + uint64_t l[8]; + unsigned int shiftlimbs; + unsigned int shiftlow; + unsigned int shifthigh; + VERIFY_CHECK(shift >= 256); + secp256k1_scalar_mul_512(l, a, b); + shiftlimbs = shift >> 6; + shiftlow = shift & 0x3F; + shifthigh = 64 - shiftlow; + r->d[0] = shift < 512 ? (l[0 + shiftlimbs] >> shiftlow | (shift < 448 && shiftlow ? (l[1 + shiftlimbs] << shifthigh) : 0)) : 0; + r->d[1] = shift < 448 ? (l[1 + shiftlimbs] >> shiftlow | (shift < 384 && shiftlow ? (l[2 + shiftlimbs] << shifthigh) : 0)) : 0; + r->d[2] = shift < 384 ? (l[2 + shiftlimbs] >> shiftlow | (shift < 320 && shiftlow ? (l[3 + shiftlimbs] << shifthigh) : 0)) : 0; + r->d[3] = shift < 320 ? (l[3 + shiftlimbs] >> shiftlow) : 0; + secp256k1_scalar_cadd_bit(r, 0, (l[(shift - 1) >> 6] >> ((shift - 1) & 0x3f)) & 1); +} + +#endif diff --git a/bf/secp256k1/src/scalar_8x32.h b/bf/secp256k1/src/scalar_8x32.h new file mode 100644 index 0000000..f17017e --- /dev/null +++ b/bf/secp256k1/src/scalar_8x32.h @@ -0,0 +1,19 @@ +/********************************************************************** + * Copyright (c) 2014 Pieter Wuille * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#ifndef _SECP256K1_SCALAR_REPR_ +#define _SECP256K1_SCALAR_REPR_ + +#include + +/** A scalar modulo the group order of the secp256k1 curve. */ +typedef struct { + uint32_t d[8]; +} secp256k1_scalar_t; + +#define SECP256K1_SCALAR_CONST(d7, d6, d5, d4, d3, d2, d1, d0) {{(d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7)}} + +#endif diff --git a/bf/secp256k1/src/scalar_8x32_impl.h b/bf/secp256k1/src/scalar_8x32_impl.h new file mode 100644 index 0000000..54923eb --- /dev/null +++ b/bf/secp256k1/src/scalar_8x32_impl.h @@ -0,0 +1,721 @@ +/********************************************************************** + * Copyright (c) 2014 Pieter Wuille * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#ifndef _SECP256K1_SCALAR_REPR_IMPL_H_ +#define _SECP256K1_SCALAR_REPR_IMPL_H_ + +/* Limbs of the secp256k1 order. */ +#define SECP256K1_N_0 ((uint32_t)0xD0364141UL) +#define SECP256K1_N_1 ((uint32_t)0xBFD25E8CUL) +#define SECP256K1_N_2 ((uint32_t)0xAF48A03BUL) +#define SECP256K1_N_3 ((uint32_t)0xBAAEDCE6UL) +#define SECP256K1_N_4 ((uint32_t)0xFFFFFFFEUL) +#define SECP256K1_N_5 ((uint32_t)0xFFFFFFFFUL) +#define SECP256K1_N_6 ((uint32_t)0xFFFFFFFFUL) +#define SECP256K1_N_7 ((uint32_t)0xFFFFFFFFUL) + +/* Limbs of 2^256 minus the secp256k1 order. */ +#define SECP256K1_N_C_0 (~SECP256K1_N_0 + 1) +#define SECP256K1_N_C_1 (~SECP256K1_N_1) +#define SECP256K1_N_C_2 (~SECP256K1_N_2) +#define SECP256K1_N_C_3 (~SECP256K1_N_3) +#define SECP256K1_N_C_4 (1) + +/* Limbs of half the secp256k1 order. */ +#define SECP256K1_N_H_0 ((uint32_t)0x681B20A0UL) +#define SECP256K1_N_H_1 ((uint32_t)0xDFE92F46UL) +#define SECP256K1_N_H_2 ((uint32_t)0x57A4501DUL) +#define SECP256K1_N_H_3 ((uint32_t)0x5D576E73UL) +#define SECP256K1_N_H_4 ((uint32_t)0xFFFFFFFFUL) +#define SECP256K1_N_H_5 ((uint32_t)0xFFFFFFFFUL) +#define SECP256K1_N_H_6 ((uint32_t)0xFFFFFFFFUL) +#define SECP256K1_N_H_7 ((uint32_t)0x7FFFFFFFUL) + +SECP256K1_INLINE static void secp256k1_scalar_clear(secp256k1_scalar_t *r) { + r->d[0] = 0; + r->d[1] = 0; + r->d[2] = 0; + r->d[3] = 0; + r->d[4] = 0; + r->d[5] = 0; + r->d[6] = 0; + r->d[7] = 0; +} + +SECP256K1_INLINE static void secp256k1_scalar_set_int(secp256k1_scalar_t *r, unsigned int v) { + r->d[0] = v; + r->d[1] = 0; + r->d[2] = 0; + r->d[3] = 0; + r->d[4] = 0; + r->d[5] = 0; + r->d[6] = 0; + r->d[7] = 0; +} + +SECP256K1_INLINE static unsigned int secp256k1_scalar_get_bits(const secp256k1_scalar_t *a, unsigned int offset, unsigned int count) { + VERIFY_CHECK((offset + count - 1) >> 5 == offset >> 5); + return (a->d[offset >> 5] >> (offset & 0x1F)) & ((1 << count) - 1); +} + +SECP256K1_INLINE static unsigned int secp256k1_scalar_get_bits_var(const secp256k1_scalar_t *a, unsigned int offset, unsigned int count) { + VERIFY_CHECK(count < 32); + VERIFY_CHECK(offset + count <= 256); + if ((offset + count - 1) >> 5 == offset >> 5) { + return secp256k1_scalar_get_bits(a, offset, count); + } else { + VERIFY_CHECK((offset >> 5) + 1 < 8); + return ((a->d[offset >> 5] >> (offset & 0x1F)) | (a->d[(offset >> 5) + 1] << (32 - (offset & 0x1F)))) & ((((uint32_t)1) << count) - 1); + } +} + +SECP256K1_INLINE static int secp256k1_scalar_check_overflow(const secp256k1_scalar_t *a) { + int yes = 0; + int no = 0; + no |= (a->d[7] < SECP256K1_N_7); /* No need for a > check. */ + no |= (a->d[6] < SECP256K1_N_6); /* No need for a > check. */ + no |= (a->d[5] < SECP256K1_N_5); /* No need for a > check. */ + no |= (a->d[4] < SECP256K1_N_4); + yes |= (a->d[4] > SECP256K1_N_4) & ~no; + no |= (a->d[3] < SECP256K1_N_3) & ~yes; + yes |= (a->d[3] > SECP256K1_N_3) & ~no; + no |= (a->d[2] < SECP256K1_N_2) & ~yes; + yes |= (a->d[2] > SECP256K1_N_2) & ~no; + no |= (a->d[1] < SECP256K1_N_1) & ~yes; + yes |= (a->d[1] > SECP256K1_N_1) & ~no; + yes |= (a->d[0] >= SECP256K1_N_0) & ~no; + return yes; +} + +SECP256K1_INLINE static int secp256k1_scalar_reduce(secp256k1_scalar_t *r, uint32_t overflow) { + uint64_t t; + VERIFY_CHECK(overflow <= 1); + t = (uint64_t)r->d[0] + overflow * SECP256K1_N_C_0; + r->d[0] = t & 0xFFFFFFFFUL; t >>= 32; + t += (uint64_t)r->d[1] + overflow * SECP256K1_N_C_1; + r->d[1] = t & 0xFFFFFFFFUL; t >>= 32; + t += (uint64_t)r->d[2] + overflow * SECP256K1_N_C_2; + r->d[2] = t & 0xFFFFFFFFUL; t >>= 32; + t += (uint64_t)r->d[3] + overflow * SECP256K1_N_C_3; + r->d[3] = t & 0xFFFFFFFFUL; t >>= 32; + t += (uint64_t)r->d[4] + overflow * SECP256K1_N_C_4; + r->d[4] = t & 0xFFFFFFFFUL; t >>= 32; + t += (uint64_t)r->d[5]; + r->d[5] = t & 0xFFFFFFFFUL; t >>= 32; + t += (uint64_t)r->d[6]; + r->d[6] = t & 0xFFFFFFFFUL; t >>= 32; + t += (uint64_t)r->d[7]; + r->d[7] = t & 0xFFFFFFFFUL; + return overflow; +} + +static int secp256k1_scalar_add(secp256k1_scalar_t *r, const secp256k1_scalar_t *a, const secp256k1_scalar_t *b) { + int overflow; + uint64_t t = (uint64_t)a->d[0] + b->d[0]; + r->d[0] = t & 0xFFFFFFFFULL; t >>= 32; + t += (uint64_t)a->d[1] + b->d[1]; + r->d[1] = t & 0xFFFFFFFFULL; t >>= 32; + t += (uint64_t)a->d[2] + b->d[2]; + r->d[2] = t & 0xFFFFFFFFULL; t >>= 32; + t += (uint64_t)a->d[3] + b->d[3]; + r->d[3] = t & 0xFFFFFFFFULL; t >>= 32; + t += (uint64_t)a->d[4] + b->d[4]; + r->d[4] = t & 0xFFFFFFFFULL; t >>= 32; + t += (uint64_t)a->d[5] + b->d[5]; + r->d[5] = t & 0xFFFFFFFFULL; t >>= 32; + t += (uint64_t)a->d[6] + b->d[6]; + r->d[6] = t & 0xFFFFFFFFULL; t >>= 32; + t += (uint64_t)a->d[7] + b->d[7]; + r->d[7] = t & 0xFFFFFFFFULL; t >>= 32; + overflow = t + secp256k1_scalar_check_overflow(r); + VERIFY_CHECK(overflow == 0 || overflow == 1); + secp256k1_scalar_reduce(r, overflow); + return overflow; +} + +static void secp256k1_scalar_cadd_bit(secp256k1_scalar_t *r, unsigned int bit, int flag) { + uint64_t t; + VERIFY_CHECK(bit < 256); + bit += ((uint32_t) flag - 1) & 0x100; /* forcing (bit >> 5) > 7 makes this a noop */ + t = (uint64_t)r->d[0] + (((uint32_t)((bit >> 5) == 0)) << (bit & 0x1F)); + r->d[0] = t & 0xFFFFFFFFULL; t >>= 32; + t += (uint64_t)r->d[1] + (((uint32_t)((bit >> 5) == 1)) << (bit & 0x1F)); + r->d[1] = t & 0xFFFFFFFFULL; t >>= 32; + t += (uint64_t)r->d[2] + (((uint32_t)((bit >> 5) == 2)) << (bit & 0x1F)); + r->d[2] = t & 0xFFFFFFFFULL; t >>= 32; + t += (uint64_t)r->d[3] + (((uint32_t)((bit >> 5) == 3)) << (bit & 0x1F)); + r->d[3] = t & 0xFFFFFFFFULL; t >>= 32; + t += (uint64_t)r->d[4] + (((uint32_t)((bit >> 5) == 4)) << (bit & 0x1F)); + r->d[4] = t & 0xFFFFFFFFULL; t >>= 32; + t += (uint64_t)r->d[5] + (((uint32_t)((bit >> 5) == 5)) << (bit & 0x1F)); + r->d[5] = t & 0xFFFFFFFFULL; t >>= 32; + t += (uint64_t)r->d[6] + (((uint32_t)((bit >> 5) == 6)) << (bit & 0x1F)); + r->d[6] = t & 0xFFFFFFFFULL; t >>= 32; + t += (uint64_t)r->d[7] + (((uint32_t)((bit >> 5) == 7)) << (bit & 0x1F)); + r->d[7] = t & 0xFFFFFFFFULL; +#ifdef VERIFY + VERIFY_CHECK((t >> 32) == 0); + VERIFY_CHECK(secp256k1_scalar_check_overflow(r) == 0); +#endif +} + +static void secp256k1_scalar_set_b32(secp256k1_scalar_t *r, const unsigned char *b32, int *overflow) { + int over; + r->d[0] = (uint32_t)b32[31] | (uint32_t)b32[30] << 8 | (uint32_t)b32[29] << 16 | (uint32_t)b32[28] << 24; + r->d[1] = (uint32_t)b32[27] | (uint32_t)b32[26] << 8 | (uint32_t)b32[25] << 16 | (uint32_t)b32[24] << 24; + r->d[2] = (uint32_t)b32[23] | (uint32_t)b32[22] << 8 | (uint32_t)b32[21] << 16 | (uint32_t)b32[20] << 24; + r->d[3] = (uint32_t)b32[19] | (uint32_t)b32[18] << 8 | (uint32_t)b32[17] << 16 | (uint32_t)b32[16] << 24; + r->d[4] = (uint32_t)b32[15] | (uint32_t)b32[14] << 8 | (uint32_t)b32[13] << 16 | (uint32_t)b32[12] << 24; + r->d[5] = (uint32_t)b32[11] | (uint32_t)b32[10] << 8 | (uint32_t)b32[9] << 16 | (uint32_t)b32[8] << 24; + r->d[6] = (uint32_t)b32[7] | (uint32_t)b32[6] << 8 | (uint32_t)b32[5] << 16 | (uint32_t)b32[4] << 24; + r->d[7] = (uint32_t)b32[3] | (uint32_t)b32[2] << 8 | (uint32_t)b32[1] << 16 | (uint32_t)b32[0] << 24; + over = secp256k1_scalar_reduce(r, secp256k1_scalar_check_overflow(r)); + if (overflow) { + *overflow = over; + } +} + +static void secp256k1_scalar_get_b32(unsigned char *bin, const secp256k1_scalar_t* a) { + bin[0] = a->d[7] >> 24; bin[1] = a->d[7] >> 16; bin[2] = a->d[7] >> 8; bin[3] = a->d[7]; + bin[4] = a->d[6] >> 24; bin[5] = a->d[6] >> 16; bin[6] = a->d[6] >> 8; bin[7] = a->d[6]; + bin[8] = a->d[5] >> 24; bin[9] = a->d[5] >> 16; bin[10] = a->d[5] >> 8; bin[11] = a->d[5]; + bin[12] = a->d[4] >> 24; bin[13] = a->d[4] >> 16; bin[14] = a->d[4] >> 8; bin[15] = a->d[4]; + bin[16] = a->d[3] >> 24; bin[17] = a->d[3] >> 16; bin[18] = a->d[3] >> 8; bin[19] = a->d[3]; + bin[20] = a->d[2] >> 24; bin[21] = a->d[2] >> 16; bin[22] = a->d[2] >> 8; bin[23] = a->d[2]; + bin[24] = a->d[1] >> 24; bin[25] = a->d[1] >> 16; bin[26] = a->d[1] >> 8; bin[27] = a->d[1]; + bin[28] = a->d[0] >> 24; bin[29] = a->d[0] >> 16; bin[30] = a->d[0] >> 8; bin[31] = a->d[0]; +} + +SECP256K1_INLINE static int secp256k1_scalar_is_zero(const secp256k1_scalar_t *a) { + return (a->d[0] | a->d[1] | a->d[2] | a->d[3] | a->d[4] | a->d[5] | a->d[6] | a->d[7]) == 0; +} + +static void secp256k1_scalar_negate(secp256k1_scalar_t *r, const secp256k1_scalar_t *a) { + uint32_t nonzero = 0xFFFFFFFFUL * (secp256k1_scalar_is_zero(a) == 0); + uint64_t t = (uint64_t)(~a->d[0]) + SECP256K1_N_0 + 1; + r->d[0] = t & nonzero; t >>= 32; + t += (uint64_t)(~a->d[1]) + SECP256K1_N_1; + r->d[1] = t & nonzero; t >>= 32; + t += (uint64_t)(~a->d[2]) + SECP256K1_N_2; + r->d[2] = t & nonzero; t >>= 32; + t += (uint64_t)(~a->d[3]) + SECP256K1_N_3; + r->d[3] = t & nonzero; t >>= 32; + t += (uint64_t)(~a->d[4]) + SECP256K1_N_4; + r->d[4] = t & nonzero; t >>= 32; + t += (uint64_t)(~a->d[5]) + SECP256K1_N_5; + r->d[5] = t & nonzero; t >>= 32; + t += (uint64_t)(~a->d[6]) + SECP256K1_N_6; + r->d[6] = t & nonzero; t >>= 32; + t += (uint64_t)(~a->d[7]) + SECP256K1_N_7; + r->d[7] = t & nonzero; +} + +SECP256K1_INLINE static int secp256k1_scalar_is_one(const secp256k1_scalar_t *a) { + return ((a->d[0] ^ 1) | a->d[1] | a->d[2] | a->d[3] | a->d[4] | a->d[5] | a->d[6] | a->d[7]) == 0; +} + +static int secp256k1_scalar_is_high(const secp256k1_scalar_t *a) { + int yes = 0; + int no = 0; + no |= (a->d[7] < SECP256K1_N_H_7); + yes |= (a->d[7] > SECP256K1_N_H_7) & ~no; + no |= (a->d[6] < SECP256K1_N_H_6) & ~yes; /* No need for a > check. */ + no |= (a->d[5] < SECP256K1_N_H_5) & ~yes; /* No need for a > check. */ + no |= (a->d[4] < SECP256K1_N_H_4) & ~yes; /* No need for a > check. */ + no |= (a->d[3] < SECP256K1_N_H_3) & ~yes; + yes |= (a->d[3] > SECP256K1_N_H_3) & ~no; + no |= (a->d[2] < SECP256K1_N_H_2) & ~yes; + yes |= (a->d[2] > SECP256K1_N_H_2) & ~no; + no |= (a->d[1] < SECP256K1_N_H_1) & ~yes; + yes |= (a->d[1] > SECP256K1_N_H_1) & ~no; + yes |= (a->d[0] > SECP256K1_N_H_0) & ~no; + return yes; +} + +static int secp256k1_scalar_cond_negate(secp256k1_scalar_t *r, int flag) { + /* If we are flag = 0, mask = 00...00 and this is a no-op; + * if we are flag = 1, mask = 11...11 and this is identical to secp256k1_scalar_negate */ + uint32_t mask = !flag - 1; + uint32_t nonzero = 0xFFFFFFFFUL * (secp256k1_scalar_is_zero(r) == 0); + uint64_t t = (uint64_t)(r->d[0] ^ mask) + ((SECP256K1_N_0 + 1) & mask); + r->d[0] = t & nonzero; t >>= 32; + t += (uint64_t)(r->d[1] ^ mask) + (SECP256K1_N_1 & mask); + r->d[1] = t & nonzero; t >>= 32; + t += (uint64_t)(r->d[2] ^ mask) + (SECP256K1_N_2 & mask); + r->d[2] = t & nonzero; t >>= 32; + t += (uint64_t)(r->d[3] ^ mask) + (SECP256K1_N_3 & mask); + r->d[3] = t & nonzero; t >>= 32; + t += (uint64_t)(r->d[4] ^ mask) + (SECP256K1_N_4 & mask); + r->d[4] = t & nonzero; t >>= 32; + t += (uint64_t)(r->d[5] ^ mask) + (SECP256K1_N_5 & mask); + r->d[5] = t & nonzero; t >>= 32; + t += (uint64_t)(r->d[6] ^ mask) + (SECP256K1_N_6 & mask); + r->d[6] = t & nonzero; t >>= 32; + t += (uint64_t)(r->d[7] ^ mask) + (SECP256K1_N_7 & mask); + r->d[7] = t & nonzero; + return 2 * (mask == 0) - 1; +} + + +/* Inspired by the macros in OpenSSL's crypto/bn/asm/x86_64-gcc.c. */ + +/** Add a*b to the number defined by (c0,c1,c2). c2 must never overflow. */ +#define muladd(a,b) { \ + uint32_t tl, th; \ + { \ + uint64_t t = (uint64_t)a * b; \ + th = t >> 32; /* at most 0xFFFFFFFE */ \ + tl = t; \ + } \ + c0 += tl; /* overflow is handled on the next line */ \ + th += (c0 < tl) ? 1 : 0; /* at most 0xFFFFFFFF */ \ + c1 += th; /* overflow is handled on the next line */ \ + c2 += (c1 < th) ? 1 : 0; /* never overflows by contract (verified in the next line) */ \ + VERIFY_CHECK((c1 >= th) || (c2 != 0)); \ +} + +/** Add a*b to the number defined by (c0,c1). c1 must never overflow. */ +#define muladd_fast(a,b) { \ + uint32_t tl, th; \ + { \ + uint64_t t = (uint64_t)a * b; \ + th = t >> 32; /* at most 0xFFFFFFFE */ \ + tl = t; \ + } \ + c0 += tl; /* overflow is handled on the next line */ \ + th += (c0 < tl) ? 1 : 0; /* at most 0xFFFFFFFF */ \ + c1 += th; /* never overflows by contract (verified in the next line) */ \ + VERIFY_CHECK(c1 >= th); \ +} + +/** Add 2*a*b to the number defined by (c0,c1,c2). c2 must never overflow. */ +#define muladd2(a,b) { \ + uint32_t tl, th, th2, tl2; \ + { \ + uint64_t t = (uint64_t)a * b; \ + th = t >> 32; /* at most 0xFFFFFFFE */ \ + tl = t; \ + } \ + th2 = th + th; /* at most 0xFFFFFFFE (in case th was 0x7FFFFFFF) */ \ + c2 += (th2 < th) ? 1 : 0; /* never overflows by contract (verified the next line) */ \ + VERIFY_CHECK((th2 >= th) || (c2 != 0)); \ + tl2 = tl + tl; /* at most 0xFFFFFFFE (in case the lowest 63 bits of tl were 0x7FFFFFFF) */ \ + th2 += (tl2 < tl) ? 1 : 0; /* at most 0xFFFFFFFF */ \ + c0 += tl2; /* overflow is handled on the next line */ \ + th2 += (c0 < tl2) ? 1 : 0; /* second overflow is handled on the next line */ \ + c2 += (c0 < tl2) & (th2 == 0); /* never overflows by contract (verified the next line) */ \ + VERIFY_CHECK((c0 >= tl2) || (th2 != 0) || (c2 != 0)); \ + c1 += th2; /* overflow is handled on the next line */ \ + c2 += (c1 < th2) ? 1 : 0; /* never overflows by contract (verified the next line) */ \ + VERIFY_CHECK((c1 >= th2) || (c2 != 0)); \ +} + +/** Add a to the number defined by (c0,c1,c2). c2 must never overflow. */ +#define sumadd(a) { \ + unsigned int over; \ + c0 += (a); /* overflow is handled on the next line */ \ + over = (c0 < (a)) ? 1 : 0; \ + c1 += over; /* overflow is handled on the next line */ \ + c2 += (c1 < over) ? 1 : 0; /* never overflows by contract */ \ +} + +/** Add a to the number defined by (c0,c1). c1 must never overflow, c2 must be zero. */ +#define sumadd_fast(a) { \ + c0 += (a); /* overflow is handled on the next line */ \ + c1 += (c0 < (a)) ? 1 : 0; /* never overflows by contract (verified the next line) */ \ + VERIFY_CHECK((c1 != 0) | (c0 >= (a))); \ + VERIFY_CHECK(c2 == 0); \ +} + +/** Extract the lowest 32 bits of (c0,c1,c2) into n, and left shift the number 32 bits. */ +#define extract(n) { \ + (n) = c0; \ + c0 = c1; \ + c1 = c2; \ + c2 = 0; \ +} + +/** Extract the lowest 32 bits of (c0,c1,c2) into n, and left shift the number 32 bits. c2 is required to be zero. */ +#define extract_fast(n) { \ + (n) = c0; \ + c0 = c1; \ + c1 = 0; \ + VERIFY_CHECK(c2 == 0); \ +} + +static void secp256k1_scalar_reduce_512(secp256k1_scalar_t *r, const uint32_t *l) { + uint64_t c; + uint32_t n0 = l[8], n1 = l[9], n2 = l[10], n3 = l[11], n4 = l[12], n5 = l[13], n6 = l[14], n7 = l[15]; + uint32_t m0, m1, m2, m3, m4, m5, m6, m7, m8, m9, m10, m11, m12; + uint32_t p0, p1, p2, p3, p4, p5, p6, p7, p8; + + /* 96 bit accumulator. */ + uint32_t c0, c1, c2; + + /* Reduce 512 bits into 385. */ + /* m[0..12] = l[0..7] + n[0..7] * SECP256K1_N_C. */ + c0 = l[0]; c1 = 0; c2 = 0; + muladd_fast(n0, SECP256K1_N_C_0); + extract_fast(m0); + sumadd_fast(l[1]); + muladd(n1, SECP256K1_N_C_0); + muladd(n0, SECP256K1_N_C_1); + extract(m1); + sumadd(l[2]); + muladd(n2, SECP256K1_N_C_0); + muladd(n1, SECP256K1_N_C_1); + muladd(n0, SECP256K1_N_C_2); + extract(m2); + sumadd(l[3]); + muladd(n3, SECP256K1_N_C_0); + muladd(n2, SECP256K1_N_C_1); + muladd(n1, SECP256K1_N_C_2); + muladd(n0, SECP256K1_N_C_3); + extract(m3); + sumadd(l[4]); + muladd(n4, SECP256K1_N_C_0); + muladd(n3, SECP256K1_N_C_1); + muladd(n2, SECP256K1_N_C_2); + muladd(n1, SECP256K1_N_C_3); + sumadd(n0); + extract(m4); + sumadd(l[5]); + muladd(n5, SECP256K1_N_C_0); + muladd(n4, SECP256K1_N_C_1); + muladd(n3, SECP256K1_N_C_2); + muladd(n2, SECP256K1_N_C_3); + sumadd(n1); + extract(m5); + sumadd(l[6]); + muladd(n6, SECP256K1_N_C_0); + muladd(n5, SECP256K1_N_C_1); + muladd(n4, SECP256K1_N_C_2); + muladd(n3, SECP256K1_N_C_3); + sumadd(n2); + extract(m6); + sumadd(l[7]); + muladd(n7, SECP256K1_N_C_0); + muladd(n6, SECP256K1_N_C_1); + muladd(n5, SECP256K1_N_C_2); + muladd(n4, SECP256K1_N_C_3); + sumadd(n3); + extract(m7); + muladd(n7, SECP256K1_N_C_1); + muladd(n6, SECP256K1_N_C_2); + muladd(n5, SECP256K1_N_C_3); + sumadd(n4); + extract(m8); + muladd(n7, SECP256K1_N_C_2); + muladd(n6, SECP256K1_N_C_3); + sumadd(n5); + extract(m9); + muladd(n7, SECP256K1_N_C_3); + sumadd(n6); + extract(m10); + sumadd_fast(n7); + extract_fast(m11); + VERIFY_CHECK(c0 <= 1); + m12 = c0; + + /* Reduce 385 bits into 258. */ + /* p[0..8] = m[0..7] + m[8..12] * SECP256K1_N_C. */ + c0 = m0; c1 = 0; c2 = 0; + muladd_fast(m8, SECP256K1_N_C_0); + extract_fast(p0); + sumadd_fast(m1); + muladd(m9, SECP256K1_N_C_0); + muladd(m8, SECP256K1_N_C_1); + extract(p1); + sumadd(m2); + muladd(m10, SECP256K1_N_C_0); + muladd(m9, SECP256K1_N_C_1); + muladd(m8, SECP256K1_N_C_2); + extract(p2); + sumadd(m3); + muladd(m11, SECP256K1_N_C_0); + muladd(m10, SECP256K1_N_C_1); + muladd(m9, SECP256K1_N_C_2); + muladd(m8, SECP256K1_N_C_3); + extract(p3); + sumadd(m4); + muladd(m12, SECP256K1_N_C_0); + muladd(m11, SECP256K1_N_C_1); + muladd(m10, SECP256K1_N_C_2); + muladd(m9, SECP256K1_N_C_3); + sumadd(m8); + extract(p4); + sumadd(m5); + muladd(m12, SECP256K1_N_C_1); + muladd(m11, SECP256K1_N_C_2); + muladd(m10, SECP256K1_N_C_3); + sumadd(m9); + extract(p5); + sumadd(m6); + muladd(m12, SECP256K1_N_C_2); + muladd(m11, SECP256K1_N_C_3); + sumadd(m10); + extract(p6); + sumadd_fast(m7); + muladd_fast(m12, SECP256K1_N_C_3); + sumadd_fast(m11); + extract_fast(p7); + p8 = c0 + m12; + VERIFY_CHECK(p8 <= 2); + + /* Reduce 258 bits into 256. */ + /* r[0..7] = p[0..7] + p[8] * SECP256K1_N_C. */ + c = p0 + (uint64_t)SECP256K1_N_C_0 * p8; + r->d[0] = c & 0xFFFFFFFFUL; c >>= 32; + c += p1 + (uint64_t)SECP256K1_N_C_1 * p8; + r->d[1] = c & 0xFFFFFFFFUL; c >>= 32; + c += p2 + (uint64_t)SECP256K1_N_C_2 * p8; + r->d[2] = c & 0xFFFFFFFFUL; c >>= 32; + c += p3 + (uint64_t)SECP256K1_N_C_3 * p8; + r->d[3] = c & 0xFFFFFFFFUL; c >>= 32; + c += p4 + (uint64_t)p8; + r->d[4] = c & 0xFFFFFFFFUL; c >>= 32; + c += p5; + r->d[5] = c & 0xFFFFFFFFUL; c >>= 32; + c += p6; + r->d[6] = c & 0xFFFFFFFFUL; c >>= 32; + c += p7; + r->d[7] = c & 0xFFFFFFFFUL; c >>= 32; + + /* Final reduction of r. */ + secp256k1_scalar_reduce(r, c + secp256k1_scalar_check_overflow(r)); +} + +static void secp256k1_scalar_mul_512(uint32_t *l, const secp256k1_scalar_t *a, const secp256k1_scalar_t *b) { + /* 96 bit accumulator. */ + uint32_t c0 = 0, c1 = 0, c2 = 0; + + /* l[0..15] = a[0..7] * b[0..7]. */ + muladd_fast(a->d[0], b->d[0]); + extract_fast(l[0]); + muladd(a->d[0], b->d[1]); + muladd(a->d[1], b->d[0]); + extract(l[1]); + muladd(a->d[0], b->d[2]); + muladd(a->d[1], b->d[1]); + muladd(a->d[2], b->d[0]); + extract(l[2]); + muladd(a->d[0], b->d[3]); + muladd(a->d[1], b->d[2]); + muladd(a->d[2], b->d[1]); + muladd(a->d[3], b->d[0]); + extract(l[3]); + muladd(a->d[0], b->d[4]); + muladd(a->d[1], b->d[3]); + muladd(a->d[2], b->d[2]); + muladd(a->d[3], b->d[1]); + muladd(a->d[4], b->d[0]); + extract(l[4]); + muladd(a->d[0], b->d[5]); + muladd(a->d[1], b->d[4]); + muladd(a->d[2], b->d[3]); + muladd(a->d[3], b->d[2]); + muladd(a->d[4], b->d[1]); + muladd(a->d[5], b->d[0]); + extract(l[5]); + muladd(a->d[0], b->d[6]); + muladd(a->d[1], b->d[5]); + muladd(a->d[2], b->d[4]); + muladd(a->d[3], b->d[3]); + muladd(a->d[4], b->d[2]); + muladd(a->d[5], b->d[1]); + muladd(a->d[6], b->d[0]); + extract(l[6]); + muladd(a->d[0], b->d[7]); + muladd(a->d[1], b->d[6]); + muladd(a->d[2], b->d[5]); + muladd(a->d[3], b->d[4]); + muladd(a->d[4], b->d[3]); + muladd(a->d[5], b->d[2]); + muladd(a->d[6], b->d[1]); + muladd(a->d[7], b->d[0]); + extract(l[7]); + muladd(a->d[1], b->d[7]); + muladd(a->d[2], b->d[6]); + muladd(a->d[3], b->d[5]); + muladd(a->d[4], b->d[4]); + muladd(a->d[5], b->d[3]); + muladd(a->d[6], b->d[2]); + muladd(a->d[7], b->d[1]); + extract(l[8]); + muladd(a->d[2], b->d[7]); + muladd(a->d[3], b->d[6]); + muladd(a->d[4], b->d[5]); + muladd(a->d[5], b->d[4]); + muladd(a->d[6], b->d[3]); + muladd(a->d[7], b->d[2]); + extract(l[9]); + muladd(a->d[3], b->d[7]); + muladd(a->d[4], b->d[6]); + muladd(a->d[5], b->d[5]); + muladd(a->d[6], b->d[4]); + muladd(a->d[7], b->d[3]); + extract(l[10]); + muladd(a->d[4], b->d[7]); + muladd(a->d[5], b->d[6]); + muladd(a->d[6], b->d[5]); + muladd(a->d[7], b->d[4]); + extract(l[11]); + muladd(a->d[5], b->d[7]); + muladd(a->d[6], b->d[6]); + muladd(a->d[7], b->d[5]); + extract(l[12]); + muladd(a->d[6], b->d[7]); + muladd(a->d[7], b->d[6]); + extract(l[13]); + muladd_fast(a->d[7], b->d[7]); + extract_fast(l[14]); + VERIFY_CHECK(c1 == 0); + l[15] = c0; +} + +static void secp256k1_scalar_sqr_512(uint32_t *l, const secp256k1_scalar_t *a) { + /* 96 bit accumulator. */ + uint32_t c0 = 0, c1 = 0, c2 = 0; + + /* l[0..15] = a[0..7]^2. */ + muladd_fast(a->d[0], a->d[0]); + extract_fast(l[0]); + muladd2(a->d[0], a->d[1]); + extract(l[1]); + muladd2(a->d[0], a->d[2]); + muladd(a->d[1], a->d[1]); + extract(l[2]); + muladd2(a->d[0], a->d[3]); + muladd2(a->d[1], a->d[2]); + extract(l[3]); + muladd2(a->d[0], a->d[4]); + muladd2(a->d[1], a->d[3]); + muladd(a->d[2], a->d[2]); + extract(l[4]); + muladd2(a->d[0], a->d[5]); + muladd2(a->d[1], a->d[4]); + muladd2(a->d[2], a->d[3]); + extract(l[5]); + muladd2(a->d[0], a->d[6]); + muladd2(a->d[1], a->d[5]); + muladd2(a->d[2], a->d[4]); + muladd(a->d[3], a->d[3]); + extract(l[6]); + muladd2(a->d[0], a->d[7]); + muladd2(a->d[1], a->d[6]); + muladd2(a->d[2], a->d[5]); + muladd2(a->d[3], a->d[4]); + extract(l[7]); + muladd2(a->d[1], a->d[7]); + muladd2(a->d[2], a->d[6]); + muladd2(a->d[3], a->d[5]); + muladd(a->d[4], a->d[4]); + extract(l[8]); + muladd2(a->d[2], a->d[7]); + muladd2(a->d[3], a->d[6]); + muladd2(a->d[4], a->d[5]); + extract(l[9]); + muladd2(a->d[3], a->d[7]); + muladd2(a->d[4], a->d[6]); + muladd(a->d[5], a->d[5]); + extract(l[10]); + muladd2(a->d[4], a->d[7]); + muladd2(a->d[5], a->d[6]); + extract(l[11]); + muladd2(a->d[5], a->d[7]); + muladd(a->d[6], a->d[6]); + extract(l[12]); + muladd2(a->d[6], a->d[7]); + extract(l[13]); + muladd_fast(a->d[7], a->d[7]); + extract_fast(l[14]); + VERIFY_CHECK(c1 == 0); + l[15] = c0; +} + +#undef sumadd +#undef sumadd_fast +#undef muladd +#undef muladd_fast +#undef muladd2 +#undef extract +#undef extract_fast + +static void secp256k1_scalar_mul(secp256k1_scalar_t *r, const secp256k1_scalar_t *a, const secp256k1_scalar_t *b) { + uint32_t l[16]; + secp256k1_scalar_mul_512(l, a, b); + secp256k1_scalar_reduce_512(r, l); +} + +static int secp256k1_scalar_shr_int(secp256k1_scalar_t *r, int n) { + int ret; + VERIFY_CHECK(n > 0); + VERIFY_CHECK(n < 16); + ret = r->d[0] & ((1 << n) - 1); + r->d[0] = (r->d[0] >> n) + (r->d[1] << (32 - n)); + r->d[1] = (r->d[1] >> n) + (r->d[2] << (32 - n)); + r->d[2] = (r->d[2] >> n) + (r->d[3] << (32 - n)); + r->d[3] = (r->d[3] >> n) + (r->d[4] << (32 - n)); + r->d[4] = (r->d[4] >> n) + (r->d[5] << (32 - n)); + r->d[5] = (r->d[5] >> n) + (r->d[6] << (32 - n)); + r->d[6] = (r->d[6] >> n) + (r->d[7] << (32 - n)); + r->d[7] = (r->d[7] >> n); + return ret; +} + +static void secp256k1_scalar_sqr(secp256k1_scalar_t *r, const secp256k1_scalar_t *a) { + uint32_t l[16]; + secp256k1_scalar_sqr_512(l, a); + secp256k1_scalar_reduce_512(r, l); +} + +#ifdef USE_ENDOMORPHISM +static void secp256k1_scalar_split_128(secp256k1_scalar_t *r1, secp256k1_scalar_t *r2, const secp256k1_scalar_t *a) { + r1->d[0] = a->d[0]; + r1->d[1] = a->d[1]; + r1->d[2] = a->d[2]; + r1->d[3] = a->d[3]; + r1->d[4] = 0; + r1->d[5] = 0; + r1->d[6] = 0; + r1->d[7] = 0; + r2->d[0] = a->d[4]; + r2->d[1] = a->d[5]; + r2->d[2] = a->d[6]; + r2->d[3] = a->d[7]; + r2->d[4] = 0; + r2->d[5] = 0; + r2->d[6] = 0; + r2->d[7] = 0; +} +#endif + +SECP256K1_INLINE static int secp256k1_scalar_eq(const secp256k1_scalar_t *a, const secp256k1_scalar_t *b) { + return ((a->d[0] ^ b->d[0]) | (a->d[1] ^ b->d[1]) | (a->d[2] ^ b->d[2]) | (a->d[3] ^ b->d[3]) | (a->d[4] ^ b->d[4]) | (a->d[5] ^ b->d[5]) | (a->d[6] ^ b->d[6]) | (a->d[7] ^ b->d[7])) == 0; +} + +SECP256K1_INLINE static void secp256k1_scalar_mul_shift_var(secp256k1_scalar_t *r, const secp256k1_scalar_t *a, const secp256k1_scalar_t *b, unsigned int shift) { + uint32_t l[16]; + unsigned int shiftlimbs; + unsigned int shiftlow; + unsigned int shifthigh; + VERIFY_CHECK(shift >= 256); + secp256k1_scalar_mul_512(l, a, b); + shiftlimbs = shift >> 5; + shiftlow = shift & 0x1F; + shifthigh = 32 - shiftlow; + r->d[0] = shift < 512 ? (l[0 + shiftlimbs] >> shiftlow | (shift < 480 && shiftlow ? (l[1 + shiftlimbs] << shifthigh) : 0)) : 0; + r->d[1] = shift < 480 ? (l[1 + shiftlimbs] >> shiftlow | (shift < 448 && shiftlow ? (l[2 + shiftlimbs] << shifthigh) : 0)) : 0; + r->d[2] = shift < 448 ? (l[2 + shiftlimbs] >> shiftlow | (shift < 416 && shiftlow ? (l[3 + shiftlimbs] << shifthigh) : 0)) : 0; + r->d[3] = shift < 416 ? (l[3 + shiftlimbs] >> shiftlow | (shift < 384 && shiftlow ? (l[4 + shiftlimbs] << shifthigh) : 0)) : 0; + r->d[4] = shift < 384 ? (l[4 + shiftlimbs] >> shiftlow | (shift < 352 && shiftlow ? (l[5 + shiftlimbs] << shifthigh) : 0)) : 0; + r->d[5] = shift < 352 ? (l[5 + shiftlimbs] >> shiftlow | (shift < 320 && shiftlow ? (l[6 + shiftlimbs] << shifthigh) : 0)) : 0; + r->d[6] = shift < 320 ? (l[6 + shiftlimbs] >> shiftlow | (shift < 288 && shiftlow ? (l[7 + shiftlimbs] << shifthigh) : 0)) : 0; + r->d[7] = shift < 288 ? (l[7 + shiftlimbs] >> shiftlow) : 0; + secp256k1_scalar_cadd_bit(r, 0, (l[(shift - 1) >> 5] >> ((shift - 1) & 0x1f)) & 1); +} + +#endif diff --git a/bf/secp256k1/src/scalar_impl.h b/bf/secp256k1/src/scalar_impl.h new file mode 100644 index 0000000..425c0bf --- /dev/null +++ b/bf/secp256k1/src/scalar_impl.h @@ -0,0 +1,337 @@ +/********************************************************************** + * Copyright (c) 2014 Pieter Wuille * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#ifndef _SECP256K1_SCALAR_IMPL_H_ +#define _SECP256K1_SCALAR_IMPL_H_ + +#include + +#include "group.h" +#include "scalar.h" + +#if defined HAVE_CONFIG_H +#include "libsecp256k1-config.h" +#endif + +#if defined(USE_SCALAR_4X64) +#include "scalar_4x64_impl.h" +#elif defined(USE_SCALAR_8X32) +#include "scalar_8x32_impl.h" +#else +#error "Please select scalar implementation" +#endif + +#ifndef USE_NUM_NONE +static void secp256k1_scalar_get_num(secp256k1_num_t *r, const secp256k1_scalar_t *a) { + unsigned char c[32]; + secp256k1_scalar_get_b32(c, a); + secp256k1_num_set_bin(r, c, 32); +} + +/** secp256k1 curve order, see secp256k1_ecdsa_const_order_as_fe in ecdsa_impl.h */ +static void secp256k1_scalar_order_get_num(secp256k1_num_t *r) { + static const unsigned char order[32] = { + 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, + 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFE, + 0xBA,0xAE,0xDC,0xE6,0xAF,0x48,0xA0,0x3B, + 0xBF,0xD2,0x5E,0x8C,0xD0,0x36,0x41,0x41 + }; + secp256k1_num_set_bin(r, order, 32); +} +#endif + +static void secp256k1_scalar_inverse(secp256k1_scalar_t *r, const secp256k1_scalar_t *x) { + secp256k1_scalar_t *t; + int i; + /* First compute x ^ (2^N - 1) for some values of N. */ + secp256k1_scalar_t x2, x3, x4, x6, x7, x8, x15, x30, x60, x120, x127; + + secp256k1_scalar_sqr(&x2, x); + secp256k1_scalar_mul(&x2, &x2, x); + + secp256k1_scalar_sqr(&x3, &x2); + secp256k1_scalar_mul(&x3, &x3, x); + + secp256k1_scalar_sqr(&x4, &x3); + secp256k1_scalar_mul(&x4, &x4, x); + + secp256k1_scalar_sqr(&x6, &x4); + secp256k1_scalar_sqr(&x6, &x6); + secp256k1_scalar_mul(&x6, &x6, &x2); + + secp256k1_scalar_sqr(&x7, &x6); + secp256k1_scalar_mul(&x7, &x7, x); + + secp256k1_scalar_sqr(&x8, &x7); + secp256k1_scalar_mul(&x8, &x8, x); + + secp256k1_scalar_sqr(&x15, &x8); + for (i = 0; i < 6; i++) { + secp256k1_scalar_sqr(&x15, &x15); + } + secp256k1_scalar_mul(&x15, &x15, &x7); + + secp256k1_scalar_sqr(&x30, &x15); + for (i = 0; i < 14; i++) { + secp256k1_scalar_sqr(&x30, &x30); + } + secp256k1_scalar_mul(&x30, &x30, &x15); + + secp256k1_scalar_sqr(&x60, &x30); + for (i = 0; i < 29; i++) { + secp256k1_scalar_sqr(&x60, &x60); + } + secp256k1_scalar_mul(&x60, &x60, &x30); + + secp256k1_scalar_sqr(&x120, &x60); + for (i = 0; i < 59; i++) { + secp256k1_scalar_sqr(&x120, &x120); + } + secp256k1_scalar_mul(&x120, &x120, &x60); + + secp256k1_scalar_sqr(&x127, &x120); + for (i = 0; i < 6; i++) { + secp256k1_scalar_sqr(&x127, &x127); + } + secp256k1_scalar_mul(&x127, &x127, &x7); + + /* Then accumulate the final result (t starts at x127). */ + t = &x127; + for (i = 0; i < 2; i++) { /* 0 */ + secp256k1_scalar_sqr(t, t); + } + secp256k1_scalar_mul(t, t, x); /* 1 */ + for (i = 0; i < 4; i++) { /* 0 */ + secp256k1_scalar_sqr(t, t); + } + secp256k1_scalar_mul(t, t, &x3); /* 111 */ + for (i = 0; i < 2; i++) { /* 0 */ + secp256k1_scalar_sqr(t, t); + } + secp256k1_scalar_mul(t, t, x); /* 1 */ + for (i = 0; i < 2; i++) { /* 0 */ + secp256k1_scalar_sqr(t, t); + } + secp256k1_scalar_mul(t, t, x); /* 1 */ + for (i = 0; i < 2; i++) { /* 0 */ + secp256k1_scalar_sqr(t, t); + } + secp256k1_scalar_mul(t, t, x); /* 1 */ + for (i = 0; i < 4; i++) { /* 0 */ + secp256k1_scalar_sqr(t, t); + } + secp256k1_scalar_mul(t, t, &x3); /* 111 */ + for (i = 0; i < 3; i++) { /* 0 */ + secp256k1_scalar_sqr(t, t); + } + secp256k1_scalar_mul(t, t, &x2); /* 11 */ + for (i = 0; i < 4; i++) { /* 0 */ + secp256k1_scalar_sqr(t, t); + } + secp256k1_scalar_mul(t, t, &x3); /* 111 */ + for (i = 0; i < 5; i++) { /* 00 */ + secp256k1_scalar_sqr(t, t); + } + secp256k1_scalar_mul(t, t, &x3); /* 111 */ + for (i = 0; i < 4; i++) { /* 00 */ + secp256k1_scalar_sqr(t, t); + } + secp256k1_scalar_mul(t, t, &x2); /* 11 */ + for (i = 0; i < 2; i++) { /* 0 */ + secp256k1_scalar_sqr(t, t); + } + secp256k1_scalar_mul(t, t, x); /* 1 */ + for (i = 0; i < 2; i++) { /* 0 */ + secp256k1_scalar_sqr(t, t); + } + secp256k1_scalar_mul(t, t, x); /* 1 */ + for (i = 0; i < 5; i++) { /* 0 */ + secp256k1_scalar_sqr(t, t); + } + secp256k1_scalar_mul(t, t, &x4); /* 1111 */ + for (i = 0; i < 2; i++) { /* 0 */ + secp256k1_scalar_sqr(t, t); + } + secp256k1_scalar_mul(t, t, x); /* 1 */ + for (i = 0; i < 3; i++) { /* 00 */ + secp256k1_scalar_sqr(t, t); + } + secp256k1_scalar_mul(t, t, x); /* 1 */ + for (i = 0; i < 4; i++) { /* 000 */ + secp256k1_scalar_sqr(t, t); + } + secp256k1_scalar_mul(t, t, x); /* 1 */ + for (i = 0; i < 2; i++) { /* 0 */ + secp256k1_scalar_sqr(t, t); + } + secp256k1_scalar_mul(t, t, x); /* 1 */ + for (i = 0; i < 10; i++) { /* 0000000 */ + secp256k1_scalar_sqr(t, t); + } + secp256k1_scalar_mul(t, t, &x3); /* 111 */ + for (i = 0; i < 4; i++) { /* 0 */ + secp256k1_scalar_sqr(t, t); + } + secp256k1_scalar_mul(t, t, &x3); /* 111 */ + for (i = 0; i < 9; i++) { /* 0 */ + secp256k1_scalar_sqr(t, t); + } + secp256k1_scalar_mul(t, t, &x8); /* 11111111 */ + for (i = 0; i < 2; i++) { /* 0 */ + secp256k1_scalar_sqr(t, t); + } + secp256k1_scalar_mul(t, t, x); /* 1 */ + for (i = 0; i < 3; i++) { /* 00 */ + secp256k1_scalar_sqr(t, t); + } + secp256k1_scalar_mul(t, t, x); /* 1 */ + for (i = 0; i < 3; i++) { /* 00 */ + secp256k1_scalar_sqr(t, t); + } + secp256k1_scalar_mul(t, t, x); /* 1 */ + for (i = 0; i < 5; i++) { /* 0 */ + secp256k1_scalar_sqr(t, t); + } + secp256k1_scalar_mul(t, t, &x4); /* 1111 */ + for (i = 0; i < 2; i++) { /* 0 */ + secp256k1_scalar_sqr(t, t); + } + secp256k1_scalar_mul(t, t, x); /* 1 */ + for (i = 0; i < 5; i++) { /* 000 */ + secp256k1_scalar_sqr(t, t); + } + secp256k1_scalar_mul(t, t, &x2); /* 11 */ + for (i = 0; i < 4; i++) { /* 00 */ + secp256k1_scalar_sqr(t, t); + } + secp256k1_scalar_mul(t, t, &x2); /* 11 */ + for (i = 0; i < 2; i++) { /* 0 */ + secp256k1_scalar_sqr(t, t); + } + secp256k1_scalar_mul(t, t, x); /* 1 */ + for (i = 0; i < 8; i++) { /* 000000 */ + secp256k1_scalar_sqr(t, t); + } + secp256k1_scalar_mul(t, t, &x2); /* 11 */ + for (i = 0; i < 3; i++) { /* 0 */ + secp256k1_scalar_sqr(t, t); + } + secp256k1_scalar_mul(t, t, &x2); /* 11 */ + for (i = 0; i < 3; i++) { /* 00 */ + secp256k1_scalar_sqr(t, t); + } + secp256k1_scalar_mul(t, t, x); /* 1 */ + for (i = 0; i < 6; i++) { /* 00000 */ + secp256k1_scalar_sqr(t, t); + } + secp256k1_scalar_mul(t, t, x); /* 1 */ + for (i = 0; i < 8; i++) { /* 00 */ + secp256k1_scalar_sqr(t, t); + } + secp256k1_scalar_mul(r, t, &x6); /* 111111 */ +} + +SECP256K1_INLINE static int secp256k1_scalar_is_even(const secp256k1_scalar_t *a) { + /* d[0] is present and is the lowest word for all representations */ + return !(a->d[0] & 1); +} + +static void secp256k1_scalar_inverse_var(secp256k1_scalar_t *r, const secp256k1_scalar_t *x) { +#if defined(USE_SCALAR_INV_BUILTIN) + secp256k1_scalar_inverse(r, x); +#elif defined(USE_SCALAR_INV_NUM) + unsigned char b[32]; + secp256k1_num_t n, m; + secp256k1_scalar_t t = *x; + secp256k1_scalar_get_b32(b, &t); + secp256k1_num_set_bin(&n, b, 32); + secp256k1_scalar_order_get_num(&m); + secp256k1_num_mod_inverse(&n, &n, &m); + secp256k1_num_get_bin(b, 32, &n); + secp256k1_scalar_set_b32(r, b, NULL); + /* Verify that the inverse was computed correctly, without GMP code. */ + secp256k1_scalar_mul(&t, &t, r); + CHECK(secp256k1_scalar_is_one(&t)); +#else +#error "Please select scalar inverse implementation" +#endif +} + +#ifdef USE_ENDOMORPHISM +/** + * The Secp256k1 curve has an endomorphism, where lambda * (x, y) = (beta * x, y), where + * lambda is {0x53,0x63,0xad,0x4c,0xc0,0x5c,0x30,0xe0,0xa5,0x26,0x1c,0x02,0x88,0x12,0x64,0x5a, + * 0x12,0x2e,0x22,0xea,0x20,0x81,0x66,0x78,0xdf,0x02,0x96,0x7c,0x1b,0x23,0xbd,0x72} + * + * "Guide to Elliptic Curve Cryptography" (Hankerson, Menezes, Vanstone) gives an algorithm + * (algorithm 3.74) to find k1 and k2 given k, such that k1 + k2 * lambda == k mod n, and k1 + * and k2 have a small size. + * It relies on constants a1, b1, a2, b2. These constants for the value of lambda above are: + * + * - a1 = {0x30,0x86,0xd2,0x21,0xa7,0xd4,0x6b,0xcd,0xe8,0x6c,0x90,0xe4,0x92,0x84,0xeb,0x15} + * - b1 = -{0xe4,0x43,0x7e,0xd6,0x01,0x0e,0x88,0x28,0x6f,0x54,0x7f,0xa9,0x0a,0xbf,0xe4,0xc3} + * - a2 = {0x01,0x14,0xca,0x50,0xf7,0xa8,0xe2,0xf3,0xf6,0x57,0xc1,0x10,0x8d,0x9d,0x44,0xcf,0xd8} + * - b2 = {0x30,0x86,0xd2,0x21,0xa7,0xd4,0x6b,0xcd,0xe8,0x6c,0x90,0xe4,0x92,0x84,0xeb,0x15} + * + * The algorithm then computes c1 = round(b1 * k / n) and c2 = round(b2 * k / n), and gives + * k1 = k - (c1*a1 + c2*a2) and k2 = -(c1*b1 + c2*b2). Instead, we use modular arithmetic, and + * compute k1 as k - k2 * lambda, avoiding the need for constants a1 and a2. + * + * g1, g2 are precomputed constants used to replace division with a rounded multiplication + * when decomposing the scalar for an endomorphism-based point multiplication. + * + * The possibility of using precomputed estimates is mentioned in "Guide to Elliptic Curve + * Cryptography" (Hankerson, Menezes, Vanstone) in section 3.5. + * + * The derivation is described in the paper "Efficient Software Implementation of Public-Key + * Cryptography on Sensor Networks Using the MSP430X Microcontroller" (Gouvea, Oliveira, Lopez), + * Section 4.3 (here we use a somewhat higher-precision estimate): + * d = a1*b2 - b1*a2 + * g1 = round((2^272)*b2/d) + * g2 = round((2^272)*b1/d) + * + * (Note that 'd' is also equal to the curve order here because [a1,b1] and [a2,b2] are found + * as outputs of the Extended Euclidean Algorithm on inputs 'order' and 'lambda'). + * + * The function below splits a in r1 and r2, such that r1 + lambda * r2 == a (mod order). + */ + +static void secp256k1_scalar_split_lambda(secp256k1_scalar_t *r1, secp256k1_scalar_t *r2, const secp256k1_scalar_t *a) { + secp256k1_scalar_t c1, c2; + static const secp256k1_scalar_t minus_lambda = SECP256K1_SCALAR_CONST( + 0xAC9C52B3UL, 0x3FA3CF1FUL, 0x5AD9E3FDUL, 0x77ED9BA4UL, + 0xA880B9FCUL, 0x8EC739C2UL, 0xE0CFC810UL, 0xB51283CFUL + ); + static const secp256k1_scalar_t minus_b1 = SECP256K1_SCALAR_CONST( + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0xE4437ED6UL, 0x010E8828UL, 0x6F547FA9UL, 0x0ABFE4C3UL + ); + static const secp256k1_scalar_t minus_b2 = SECP256K1_SCALAR_CONST( + 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFEUL, + 0x8A280AC5UL, 0x0774346DUL, 0xD765CDA8UL, 0x3DB1562CUL + ); + static const secp256k1_scalar_t g1 = SECP256K1_SCALAR_CONST( + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00003086UL, + 0xD221A7D4UL, 0x6BCDE86CUL, 0x90E49284UL, 0xEB153DABUL + ); + static const secp256k1_scalar_t g2 = SECP256K1_SCALAR_CONST( + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0000E443UL, + 0x7ED6010EUL, 0x88286F54UL, 0x7FA90ABFUL, 0xE4C42212UL + ); + VERIFY_CHECK(r1 != a); + VERIFY_CHECK(r2 != a); + /* these _var calls are constant time since the shift amount is constant */ + secp256k1_scalar_mul_shift_var(&c1, a, &g1, 272); + secp256k1_scalar_mul_shift_var(&c2, a, &g2, 272); + secp256k1_scalar_mul(&c1, &c1, &minus_b1); + secp256k1_scalar_mul(&c2, &c2, &minus_b2); + secp256k1_scalar_add(r2, &c1, &c2); + secp256k1_scalar_mul(r1, r2, &minus_lambda); + secp256k1_scalar_add(r1, r1, a); +} +#endif + +#endif diff --git a/bf/secp256k1/src/secp256k1.c b/bf/secp256k1/src/secp256k1.c new file mode 100644 index 0000000..4699bff --- /dev/null +++ b/bf/secp256k1/src/secp256k1.c @@ -0,0 +1,569 @@ +/********************************************************************** + * Copyright (c) 2013-2015 Pieter Wuille * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#define SECP256K1_BUILD (1) + +#include "include/secp256k1.h" + +#include "util.h" +#include "num_impl.h" +#include "field_impl.h" +#include "scalar_impl.h" +#include "group_impl.h" +#include "ecmult_impl.h" +#include "ecmult_const_impl.h" +#include "ecmult_gen_impl.h" +#include "ecdsa_impl.h" +#include "eckey_impl.h" +#include "hash_impl.h" + +#define ARG_CHECK(cond) do { \ + if (EXPECT(!(cond), 0)) { \ + ctx->illegal_callback.fn(#cond, ctx->illegal_callback.data); \ + return 0; \ + } \ +} while(0) + +static void default_illegal_callback_fn(const char* str, void* data) { + (void)data; + fprintf(stderr, "[libsecp256k1] illegal argument: %s\n", str); + abort(); +} + +static const callback_t default_illegal_callback = { + default_illegal_callback_fn, + NULL +}; + +static void default_error_callback_fn(const char* str, void* data) { + (void)data; + fprintf(stderr, "[libsecp256k1] internal consistency check failed: %s\n", str); + abort(); +} + +static const callback_t default_error_callback = { + default_error_callback_fn, + NULL +}; + + +struct secp256k1_context_struct { + secp256k1_ecmult_context_t ecmult_ctx; + secp256k1_ecmult_gen_context_t ecmult_gen_ctx; + callback_t illegal_callback; + callback_t error_callback; +}; + +secp256k1_context_t* secp256k1_context_create(int flags) { + secp256k1_context_t* ret = (secp256k1_context_t*)checked_malloc(&default_error_callback, sizeof(secp256k1_context_t)); + ret->illegal_callback = default_illegal_callback; + ret->error_callback = default_error_callback; + + secp256k1_ecmult_context_init(&ret->ecmult_ctx); + secp256k1_ecmult_gen_context_init(&ret->ecmult_gen_ctx); + + if (flags & SECP256K1_CONTEXT_SIGN) { + secp256k1_ecmult_gen_context_build(&ret->ecmult_gen_ctx, &ret->error_callback); + } + if (flags & SECP256K1_CONTEXT_VERIFY) { + secp256k1_ecmult_context_build(&ret->ecmult_ctx, &ret->error_callback); + } + + return ret; +} + +secp256k1_context_t* secp256k1_context_clone(const secp256k1_context_t* ctx) { + secp256k1_context_t* ret = (secp256k1_context_t*)checked_malloc(&ctx->error_callback, sizeof(secp256k1_context_t)); + ret->illegal_callback = ctx->illegal_callback; + ret->error_callback = ctx->error_callback; + secp256k1_ecmult_context_clone(&ret->ecmult_ctx, &ctx->ecmult_ctx, &ctx->error_callback); + secp256k1_ecmult_gen_context_clone(&ret->ecmult_gen_ctx, &ctx->ecmult_gen_ctx, &ctx->error_callback); + return ret; +} + +void secp256k1_context_destroy(secp256k1_context_t* ctx) { + secp256k1_ecmult_context_clear(&ctx->ecmult_ctx); + secp256k1_ecmult_gen_context_clear(&ctx->ecmult_gen_ctx); + + free(ctx); +} + +void secp256k1_context_set_illegal_callback(secp256k1_context_t* ctx, void (*fun)(const char* message, void* data), void* data) { + ctx->illegal_callback.fn = fun; + ctx->illegal_callback.data = data; +} + +void secp256k1_context_set_error_callback(secp256k1_context_t* ctx, void (*fun)(const char* message, void* data), void* data) { + ctx->error_callback.fn = fun; + ctx->error_callback.data = data; +} + +static int secp256k1_pubkey_load(const secp256k1_context_t* ctx, secp256k1_ge_t* ge, const secp256k1_pubkey_t* pubkey) { + if (sizeof(secp256k1_ge_storage_t) == 64) { + /* When the secp256k1_ge_storage_t type is exactly 64 byte, use its + * representation inside secp256k1_pubkey_t, as conversion is very fast. + * Note that secp256k1_pubkey_save must use the same representation. */ + secp256k1_ge_storage_t s; + memcpy(&s, &pubkey->data[0], 64); + secp256k1_ge_from_storage(ge, &s); + } else { + /* Otherwise, fall back to 32-byte big endian for X and Y. */ + secp256k1_fe_t x, y; + secp256k1_fe_set_b32(&x, pubkey->data); + secp256k1_fe_set_b32(&y, pubkey->data + 32); + secp256k1_ge_set_xy(ge, &x, &y); + } + ARG_CHECK(!secp256k1_fe_is_zero(&ge->x)); + return 1; +} + +static void secp256k1_pubkey_save(secp256k1_pubkey_t* pubkey, secp256k1_ge_t* ge) { + if (sizeof(secp256k1_ge_storage_t) == 64) { + secp256k1_ge_storage_t s; + secp256k1_ge_to_storage(&s, ge); + memcpy(&pubkey->data[0], &s, 64); + } else { + VERIFY_CHECK(!secp256k1_ge_is_infinity(ge)); + secp256k1_fe_normalize_var(&ge->x); + secp256k1_fe_normalize_var(&ge->y); + secp256k1_fe_get_b32(pubkey->data, &ge->x); + secp256k1_fe_get_b32(pubkey->data + 32, &ge->y); + } +} + +int secp256k1_ec_pubkey_parse(const secp256k1_context_t* ctx, secp256k1_pubkey_t* pubkey, const unsigned char *input, int inputlen) { + secp256k1_ge_t Q; + + (void)ctx; + if (!secp256k1_eckey_pubkey_parse(&Q, input, inputlen)) { + memset(pubkey, 0, sizeof(*pubkey)); + return 0; + } + secp256k1_pubkey_save(pubkey, &Q); + secp256k1_ge_clear(&Q); + return 1; +} + +int secp256k1_ec_pubkey_serialize(const secp256k1_context_t* ctx, unsigned char *output, int *outputlen, const secp256k1_pubkey_t* pubkey, int compressed) { + secp256k1_ge_t Q; + + (void)ctx; + return (secp256k1_pubkey_load(ctx, &Q, pubkey) && + secp256k1_eckey_pubkey_serialize(&Q, output, outputlen, compressed)); +} + +static void secp256k1_ecdsa_signature_load(const secp256k1_context_t* ctx, secp256k1_scalar_t* r, secp256k1_scalar_t* s, int* recid, const secp256k1_ecdsa_signature_t* sig) { + (void)ctx; + if (sizeof(secp256k1_scalar_t) == 32) { + /* When the secp256k1_scalar_t type is exactly 32 byte, use its + * representation inside secp256k1_ecdsa_signature_t, as conversion is very fast. + * Note that secp256k1_ecdsa_signature_save must use the same representation. */ + memcpy(r, &sig->data[0], 32); + memcpy(s, &sig->data[32], 32); + } else { + secp256k1_scalar_set_b32(r, &sig->data[0], NULL); + secp256k1_scalar_set_b32(s, &sig->data[32], NULL); + } + if (recid) { + *recid = sig->data[64]; + } +} + +static void secp256k1_ecdsa_signature_save(secp256k1_ecdsa_signature_t* sig, const secp256k1_scalar_t* r, const secp256k1_scalar_t* s, int recid) { + if (sizeof(secp256k1_scalar_t) == 32) { + memcpy(&sig->data[0], r, 32); + memcpy(&sig->data[32], s, 32); + } else { + secp256k1_scalar_get_b32(&sig->data[0], r); + secp256k1_scalar_get_b32(&sig->data[32], s); + } + sig->data[64] = recid; +} + +int secp256k1_ecdsa_signature_parse_der(const secp256k1_context_t* ctx, secp256k1_ecdsa_signature_t* sig, const unsigned char *input, int inputlen) { + secp256k1_scalar_t r, s; + + (void)ctx; + ARG_CHECK(sig != NULL); + ARG_CHECK(input != NULL); + + if (secp256k1_ecdsa_sig_parse(&r, &s, input, inputlen)) { + secp256k1_ecdsa_signature_save(sig, &r, &s, -1); + return 1; + } else { + memset(sig, 0, sizeof(*sig)); + return 0; + } +} + +int secp256k1_ecdsa_signature_parse_compact(const secp256k1_context_t* ctx, secp256k1_ecdsa_signature_t* sig, const unsigned char *input64, int recid) { + secp256k1_scalar_t r, s; + int ret = 1; + int overflow = 0; + + (void)ctx; + ARG_CHECK(sig != NULL); + ARG_CHECK(input64 != NULL); + + secp256k1_scalar_set_b32(&r, &input64[0], &overflow); + ret &= !overflow; + secp256k1_scalar_set_b32(&s, &input64[32], &overflow); + ret &= !overflow; + ret &= (recid == -1 || (recid >= 0 && recid < 4)); + if (ret) { + secp256k1_ecdsa_signature_save(sig, &r, &s, recid); + } else { + memset(sig, 0, sizeof(*sig)); + } + return ret; +} + +int secp256k1_ecdsa_signature_serialize_der(const secp256k1_context_t* ctx, unsigned char *output, int *outputlen, const secp256k1_ecdsa_signature_t* sig) { + secp256k1_scalar_t r, s; + + (void)ctx; + ARG_CHECK(output != NULL); + ARG_CHECK(outputlen != NULL); + ARG_CHECK(sig != NULL); + + secp256k1_ecdsa_signature_load(ctx, &r, &s, NULL, sig); + return secp256k1_ecdsa_sig_serialize(output, outputlen, &r, &s); +} + +int secp256k1_ecdsa_signature_serialize_compact(const secp256k1_context_t* ctx, unsigned char *output64, int *recid, const secp256k1_ecdsa_signature_t* sig) { + secp256k1_scalar_t r, s; + int rec; + + (void)ctx; + ARG_CHECK(output64 != NULL); + ARG_CHECK(sig != NULL); + + secp256k1_ecdsa_signature_load(ctx, &r, &s, &rec, sig); + secp256k1_scalar_get_b32(&output64[0], &r); + secp256k1_scalar_get_b32(&output64[32], &s); + if (recid) { + ARG_CHECK(rec >= 0 && rec < 4); + *recid = rec; + } + return 1; +} + +int secp256k1_ecdsa_verify(const secp256k1_context_t* ctx, const unsigned char *msg32, const secp256k1_ecdsa_signature_t *sig, const secp256k1_pubkey_t *pubkey) { + secp256k1_ge_t q; + secp256k1_scalar_t r, s; + secp256k1_scalar_t m; + ARG_CHECK(ctx != NULL); + ARG_CHECK(secp256k1_ecmult_context_is_built(&ctx->ecmult_ctx)); + ARG_CHECK(msg32 != NULL); + ARG_CHECK(sig != NULL); + ARG_CHECK(pubkey != NULL); + + secp256k1_scalar_set_b32(&m, msg32, NULL); + secp256k1_ecdsa_signature_load(ctx, &r, &s, NULL, sig); + return (secp256k1_pubkey_load(ctx, &q, pubkey) && + secp256k1_ecdsa_sig_verify(&ctx->ecmult_ctx, &r, &s, &q, &m)); +} + +static int nonce_function_rfc6979(unsigned char *nonce32, const unsigned char *msg32, const unsigned char *key32, const unsigned char *algo16, unsigned int counter, const void *data) { + unsigned char keydata[112]; + int keylen = 64; + secp256k1_rfc6979_hmac_sha256_t rng; + unsigned int i; + /* We feed a byte array to the PRNG as input, consisting of: + * - the private key (32 bytes) and message (32 bytes), see RFC 6979 3.2d. + * - optionally 32 extra bytes of data, see RFC 6979 3.6 Additional Data. + * - optionally 16 extra bytes with the algorithm name (the extra data bytes + * are set to zeroes when not present, while the algorithm name is). + */ + memcpy(keydata, key32, 32); + memcpy(keydata + 32, msg32, 32); + if (data != NULL) { + memcpy(keydata + 64, data, 32); + keylen = 96; + } + if (algo16 != NULL) { + memset(keydata + keylen, 0, 96 - keylen); + memcpy(keydata + 96, algo16, 16); + keylen = 112; + } + secp256k1_rfc6979_hmac_sha256_initialize(&rng, keydata, keylen); + memset(keydata, 0, sizeof(keydata)); + for (i = 0; i <= counter; i++) { + secp256k1_rfc6979_hmac_sha256_generate(&rng, nonce32, 32); + } + secp256k1_rfc6979_hmac_sha256_finalize(&rng); + return 1; +} + +const secp256k1_nonce_function_t secp256k1_nonce_function_rfc6979 = nonce_function_rfc6979; +const secp256k1_nonce_function_t secp256k1_nonce_function_default = nonce_function_rfc6979; + +int secp256k1_ecdsa_sign(const secp256k1_context_t* ctx, const unsigned char *msg32, secp256k1_ecdsa_signature_t *signature, const unsigned char *seckey, secp256k1_nonce_function_t noncefp, const void* noncedata) { + secp256k1_scalar_t r, s; + secp256k1_scalar_t sec, non, msg; + int recid; + int ret = 0; + int overflow = 0; + unsigned int count = 0; + ARG_CHECK(ctx != NULL); + ARG_CHECK(secp256k1_ecmult_gen_context_is_built(&ctx->ecmult_gen_ctx)); + ARG_CHECK(msg32 != NULL); + ARG_CHECK(signature != NULL); + ARG_CHECK(seckey != NULL); + if (noncefp == NULL) { + noncefp = secp256k1_nonce_function_default; + } + + secp256k1_scalar_set_b32(&sec, seckey, &overflow); + /* Fail if the secret key is invalid. */ + if (!overflow && !secp256k1_scalar_is_zero(&sec)) { + secp256k1_scalar_set_b32(&msg, msg32, NULL); + while (1) { + unsigned char nonce32[32]; + ret = noncefp(nonce32, msg32, seckey, NULL, count, noncedata); + if (!ret) { + break; + } + secp256k1_scalar_set_b32(&non, nonce32, &overflow); + memset(nonce32, 0, 32); + if (!secp256k1_scalar_is_zero(&non) && !overflow) { + if (secp256k1_ecdsa_sig_sign(&ctx->ecmult_gen_ctx, &r, &s, &sec, &msg, &non, &recid)) { + break; + } + } + count++; + } + secp256k1_scalar_clear(&msg); + secp256k1_scalar_clear(&non); + secp256k1_scalar_clear(&sec); + } + if (ret) { + secp256k1_ecdsa_signature_save(signature, &r, &s, recid); + } else { + memset(signature, 0, sizeof(*signature)); + } + return ret; +} + +int secp256k1_ecdsa_recover(const secp256k1_context_t* ctx, const unsigned char *msg32, const secp256k1_ecdsa_signature_t *signature, secp256k1_pubkey_t *pubkey) { + secp256k1_ge_t q; + secp256k1_scalar_t r, s; + secp256k1_scalar_t m; + int recid; + ARG_CHECK(ctx != NULL); + ARG_CHECK(secp256k1_ecmult_context_is_built(&ctx->ecmult_ctx)); + ARG_CHECK(msg32 != NULL); + ARG_CHECK(signature != NULL); + ARG_CHECK(pubkey != NULL); + + secp256k1_ecdsa_signature_load(ctx, &r, &s, &recid, signature); + ARG_CHECK(recid >= 0 && recid < 4); + secp256k1_scalar_set_b32(&m, msg32, NULL); + if (secp256k1_ecdsa_sig_recover(&ctx->ecmult_ctx, &r, &s, &q, &m, recid)) { + secp256k1_pubkey_save(pubkey, &q); + return 1; + } else { + memset(pubkey, 0, sizeof(*pubkey)); + return 0; + } +} + +int secp256k1_ec_seckey_verify(const secp256k1_context_t* ctx, const unsigned char *seckey) { + secp256k1_scalar_t sec; + int ret; + int overflow; + ARG_CHECK(ctx != NULL); + ARG_CHECK(seckey != NULL); + (void)ctx; + + secp256k1_scalar_set_b32(&sec, seckey, &overflow); + ret = !secp256k1_scalar_is_zero(&sec) && !overflow; + secp256k1_scalar_clear(&sec); + return ret; +} + +int secp256k1_ec_pubkey_create(const secp256k1_context_t* ctx, secp256k1_pubkey_t *pubkey, const unsigned char *seckey) { + secp256k1_gej_t pj; + secp256k1_ge_t p; + secp256k1_scalar_t sec; + int overflow; + int ret = 0; + ARG_CHECK(ctx != NULL); + ARG_CHECK(secp256k1_ecmult_gen_context_is_built(&ctx->ecmult_gen_ctx)); + ARG_CHECK(pubkey != NULL); + ARG_CHECK(seckey != NULL); + + secp256k1_scalar_set_b32(&sec, seckey, &overflow); + ret = !overflow & !secp256k1_scalar_is_zero(&sec); + secp256k1_ecmult_gen(&ctx->ecmult_gen_ctx, &pj, &sec); + secp256k1_ge_set_gej(&p, &pj); + secp256k1_pubkey_save(pubkey, &p); + secp256k1_scalar_clear(&sec); + if (!ret) { + memset(pubkey, 0, sizeof(*pubkey)); + } + return ret; +} + +int secp256k1_ec_privkey_tweak_add(const secp256k1_context_t* ctx, unsigned char *seckey, const unsigned char *tweak) { + secp256k1_scalar_t term; + secp256k1_scalar_t sec; + int ret = 0; + int overflow = 0; + ARG_CHECK(ctx != NULL); + ARG_CHECK(seckey != NULL); + ARG_CHECK(tweak != NULL); + (void)ctx; + + secp256k1_scalar_set_b32(&term, tweak, &overflow); + secp256k1_scalar_set_b32(&sec, seckey, NULL); + + ret = secp256k1_eckey_privkey_tweak_add(&sec, &term) && !overflow; + if (ret) { + secp256k1_scalar_get_b32(seckey, &sec); + } + + secp256k1_scalar_clear(&sec); + secp256k1_scalar_clear(&term); + return ret; +} + +int secp256k1_ec_pubkey_tweak_add(const secp256k1_context_t* ctx, secp256k1_pubkey_t *pubkey, const unsigned char *tweak) { + secp256k1_ge_t p; + secp256k1_scalar_t term; + int ret = 0; + int overflow = 0; + ARG_CHECK(ctx != NULL); + ARG_CHECK(secp256k1_ecmult_context_is_built(&ctx->ecmult_ctx)); + ARG_CHECK(pubkey != NULL); + ARG_CHECK(tweak != NULL); + + secp256k1_scalar_set_b32(&term, tweak, &overflow); + if (!overflow && secp256k1_pubkey_load(ctx, &p, pubkey)) { + ret = secp256k1_eckey_pubkey_tweak_add(&ctx->ecmult_ctx, &p, &term); + if (ret) { + secp256k1_pubkey_save(pubkey, &p); + } else { + memset(pubkey, 0, sizeof(*pubkey)); + } + } + + return ret; +} + +int secp256k1_ec_privkey_tweak_mul(const secp256k1_context_t* ctx, unsigned char *seckey, const unsigned char *tweak) { + secp256k1_scalar_t factor; + secp256k1_scalar_t sec; + int ret = 0; + int overflow = 0; + ARG_CHECK(ctx != NULL); + ARG_CHECK(seckey != NULL); + ARG_CHECK(tweak != NULL); + (void)ctx; + + secp256k1_scalar_set_b32(&factor, tweak, &overflow); + secp256k1_scalar_set_b32(&sec, seckey, NULL); + ret = secp256k1_eckey_privkey_tweak_mul(&sec, &factor) && !overflow; + if (ret) { + secp256k1_scalar_get_b32(seckey, &sec); + } + + secp256k1_scalar_clear(&sec); + secp256k1_scalar_clear(&factor); + return ret; +} + +int secp256k1_ec_pubkey_tweak_mul(const secp256k1_context_t* ctx, secp256k1_pubkey_t *pubkey, const unsigned char *tweak) { + secp256k1_ge_t p; + secp256k1_scalar_t factor; + int ret = 0; + int overflow = 0; + ARG_CHECK(ctx != NULL); + ARG_CHECK(secp256k1_ecmult_context_is_built(&ctx->ecmult_ctx)); + ARG_CHECK(pubkey != NULL); + ARG_CHECK(tweak != NULL); + + secp256k1_scalar_set_b32(&factor, tweak, &overflow); + if (!overflow && secp256k1_pubkey_load(ctx, &p, pubkey)) { + ret = secp256k1_eckey_pubkey_tweak_mul(&ctx->ecmult_ctx, &p, &factor); + if (ret) { + secp256k1_pubkey_save(pubkey, &p); + } else { + memset(pubkey, 0, sizeof(*pubkey)); + } + } + + return ret; +} + +int secp256k1_ec_privkey_export(const secp256k1_context_t* ctx, const unsigned char *seckey, unsigned char *privkey, int *privkeylen, int compressed) { + secp256k1_scalar_t key; + int ret = 0; + ARG_CHECK(seckey != NULL); + ARG_CHECK(privkey != NULL); + ARG_CHECK(privkeylen != NULL); + ARG_CHECK(ctx != NULL); + ARG_CHECK(secp256k1_ecmult_gen_context_is_built(&ctx->ecmult_gen_ctx)); + + secp256k1_scalar_set_b32(&key, seckey, NULL); + ret = secp256k1_eckey_privkey_serialize(&ctx->ecmult_gen_ctx, privkey, privkeylen, &key, compressed); + secp256k1_scalar_clear(&key); + return ret; +} + +int secp256k1_ec_privkey_import(const secp256k1_context_t* ctx, unsigned char *seckey, const unsigned char *privkey, int privkeylen) { + secp256k1_scalar_t key; + int ret = 0; + ARG_CHECK(seckey != NULL); + ARG_CHECK(privkey != NULL); + (void)ctx; + + ret = secp256k1_eckey_privkey_parse(&key, privkey, privkeylen); + if (ret) { + secp256k1_scalar_get_b32(seckey, &key); + } + secp256k1_scalar_clear(&key); + return ret; +} + +int secp256k1_context_randomize(secp256k1_context_t* ctx, const unsigned char *seed32) { + ARG_CHECK(ctx != NULL); + ARG_CHECK(secp256k1_ecmult_gen_context_is_built(&ctx->ecmult_gen_ctx)); + secp256k1_ecmult_gen_blind(&ctx->ecmult_gen_ctx, seed32); + return 1; +} + +int secp256k1_ec_pubkey_combine(const secp256k1_context_t* ctx, secp256k1_pubkey_t *pubnonce, int n, const secp256k1_pubkey_t * const *pubnonces) { + int i; + secp256k1_gej_t Qj; + secp256k1_ge_t Q; + + ARG_CHECK(pubnonce != NULL); + ARG_CHECK(n >= 1); + ARG_CHECK(pubnonces != NULL); + + secp256k1_gej_set_infinity(&Qj); + + for (i = 0; i < n; i++) { + secp256k1_pubkey_load(ctx, &Q, pubnonces[i]); + secp256k1_gej_add_ge(&Qj, &Qj, &Q); + } + if (secp256k1_gej_is_infinity(&Qj)) { + memset(pubnonce, 0, sizeof(*pubnonce)); + return 0; + } + secp256k1_ge_set_gej(&Q, &Qj); + secp256k1_pubkey_save(pubnonce, &Q); + return 1; +} + +#ifdef ENABLE_MODULE_ECDH +# include "modules/ecdh/main_impl.h" +#endif + +#ifdef ENABLE_MODULE_SCHNORR +# include "modules/schnorr/main_impl.h" +#endif diff --git a/bf/secp256k1/src/testrand.h b/bf/secp256k1/src/testrand.h new file mode 100644 index 0000000..041bb92 --- /dev/null +++ b/bf/secp256k1/src/testrand.h @@ -0,0 +1,28 @@ +/********************************************************************** + * Copyright (c) 2013, 2014 Pieter Wuille * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#ifndef _SECP256K1_TESTRAND_H_ +#define _SECP256K1_TESTRAND_H_ + +#if defined HAVE_CONFIG_H +#include "libsecp256k1-config.h" +#endif + +/* A non-cryptographic RNG used only for test infrastructure. */ + +/** Seed the pseudorandom number generator for testing. */ +SECP256K1_INLINE static void secp256k1_rand_seed(const unsigned char *seed16); + +/** Generate a pseudorandom 32-bit number. */ +static uint32_t secp256k1_rand32(void); + +/** Generate a pseudorandom 32-byte array. */ +static void secp256k1_rand256(unsigned char *b32); + +/** Generate a pseudorandom 32-byte array with long sequences of zero and one bits. */ +static void secp256k1_rand256_test(unsigned char *b32); + +#endif diff --git a/bf/secp256k1/src/testrand_impl.h b/bf/secp256k1/src/testrand_impl.h new file mode 100644 index 0000000..7c35542 --- /dev/null +++ b/bf/secp256k1/src/testrand_impl.h @@ -0,0 +1,60 @@ +/********************************************************************** + * Copyright (c) 2013, 2014 Pieter Wuille * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#ifndef _SECP256K1_TESTRAND_IMPL_H_ +#define _SECP256K1_TESTRAND_IMPL_H_ + +#include +#include + +#include "testrand.h" +#include "hash.h" + +static secp256k1_rfc6979_hmac_sha256_t secp256k1_test_rng; +static uint32_t secp256k1_test_rng_precomputed[8]; +static int secp256k1_test_rng_precomputed_used = 8; + +SECP256K1_INLINE static void secp256k1_rand_seed(const unsigned char *seed16) { + secp256k1_rfc6979_hmac_sha256_initialize(&secp256k1_test_rng, seed16, 16); +} + +SECP256K1_INLINE static uint32_t secp256k1_rand32(void) { + if (secp256k1_test_rng_precomputed_used == 8) { + secp256k1_rfc6979_hmac_sha256_generate(&secp256k1_test_rng, (unsigned char*)(&secp256k1_test_rng_precomputed[0]), sizeof(secp256k1_test_rng_precomputed)); + secp256k1_test_rng_precomputed_used = 0; + } + return secp256k1_test_rng_precomputed[secp256k1_test_rng_precomputed_used++]; +} + +static void secp256k1_rand256(unsigned char *b32) { + secp256k1_rfc6979_hmac_sha256_generate(&secp256k1_test_rng, b32, 32); +} + +static void secp256k1_rand256_test(unsigned char *b32) { + int bits=0; + uint64_t ent = 0; + int entleft = 0; + memset(b32, 0, 32); + while (bits < 256) { + int now; + uint32_t val; + if (entleft < 12) { + ent |= ((uint64_t)secp256k1_rand32()) << entleft; + entleft += 32; + } + now = 1 + ((ent % 64)*((ent >> 6) % 32)+16)/31; + val = 1 & (ent >> 11); + ent >>= 12; + entleft -= 12; + while (now > 0 && bits < 256) { + b32[bits / 8] |= val << (bits % 8); + now--; + bits++; + } + } +} + +#endif diff --git a/bf/secp256k1/src/tests.c b/bf/secp256k1/src/tests.c new file mode 100644 index 0000000..ca055c4 --- /dev/null +++ b/bf/secp256k1/src/tests.c @@ -0,0 +1,2532 @@ +/********************************************************************** + * Copyright (c) 2013, 2014, 2015 Pieter Wuille, Gregory Maxwell * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#if defined HAVE_CONFIG_H +#include "libsecp256k1-config.h" +#endif + +#include +#include + +#include + +#include "include/secp256k1.h" +#include "secp256k1.c" +#include "testrand_impl.h" + +#ifdef ENABLE_OPENSSL_TESTS +#include "openssl/bn.h" +#include "openssl/ec.h" +#include "openssl/ecdsa.h" +#include "openssl/obj_mac.h" +#endif + +static int count = 64; +static secp256k1_context_t *ctx = NULL; + +void random_field_element_test(secp256k1_fe_t *fe) { + do { + unsigned char b32[32]; + secp256k1_rand256_test(b32); + if (secp256k1_fe_set_b32(fe, b32)) { + break; + } + } while(1); +} + +void random_field_element_magnitude(secp256k1_fe_t *fe) { + secp256k1_fe_t zero; + int n = secp256k1_rand32() % 9; + secp256k1_fe_normalize(fe); + if (n == 0) { + return; + } + secp256k1_fe_clear(&zero); + secp256k1_fe_negate(&zero, &zero, 0); + secp256k1_fe_mul_int(&zero, n - 1); + secp256k1_fe_add(fe, &zero); + VERIFY_CHECK(fe->magnitude == n); +} + +void random_group_element_test(secp256k1_ge_t *ge) { + secp256k1_fe_t fe; + do { + random_field_element_test(&fe); + if (secp256k1_ge_set_xo_var(ge, &fe, secp256k1_rand32() & 1)) { + secp256k1_fe_normalize(&ge->y); + break; + } + } while(1); +} + +void random_group_element_jacobian_test(secp256k1_gej_t *gej, const secp256k1_ge_t *ge) { + secp256k1_fe_t z2, z3; + do { + random_field_element_test(&gej->z); + if (!secp256k1_fe_is_zero(&gej->z)) { + break; + } + } while(1); + secp256k1_fe_sqr(&z2, &gej->z); + secp256k1_fe_mul(&z3, &z2, &gej->z); + secp256k1_fe_mul(&gej->x, &ge->x, &z2); + secp256k1_fe_mul(&gej->y, &ge->y, &z3); + gej->infinity = ge->infinity; +} + +void random_scalar_order_test(secp256k1_scalar_t *num) { + do { + unsigned char b32[32]; + int overflow = 0; + secp256k1_rand256_test(b32); + secp256k1_scalar_set_b32(num, b32, &overflow); + if (overflow || secp256k1_scalar_is_zero(num)) { + continue; + } + break; + } while(1); +} + +void random_scalar_order(secp256k1_scalar_t *num) { + do { + unsigned char b32[32]; + int overflow = 0; + secp256k1_rand256(b32); + secp256k1_scalar_set_b32(num, b32, &overflow); + if (overflow || secp256k1_scalar_is_zero(num)) { + continue; + } + break; + } while(1); +} + +void run_context_tests(void) { + secp256k1_context_t *none = secp256k1_context_create(0); + secp256k1_context_t *sign = secp256k1_context_create(SECP256K1_CONTEXT_SIGN); + secp256k1_context_t *vrfy = secp256k1_context_create(SECP256K1_CONTEXT_VERIFY); + secp256k1_context_t *both = secp256k1_context_create(SECP256K1_CONTEXT_SIGN | SECP256K1_CONTEXT_VERIFY); + + secp256k1_gej_t pubj; + secp256k1_ge_t pub; + secp256k1_scalar_t msg, key, nonce; + secp256k1_scalar_t sigr, sigs; + + /*** clone and destroy all of them to make sure cloning was complete ***/ + { + secp256k1_context_t *ctx_tmp; + + ctx_tmp = none; none = secp256k1_context_clone(none); secp256k1_context_destroy(ctx_tmp); + ctx_tmp = sign; sign = secp256k1_context_clone(sign); secp256k1_context_destroy(ctx_tmp); + ctx_tmp = vrfy; vrfy = secp256k1_context_clone(vrfy); secp256k1_context_destroy(ctx_tmp); + ctx_tmp = both; both = secp256k1_context_clone(both); secp256k1_context_destroy(ctx_tmp); + } + + /*** attempt to use them ***/ + random_scalar_order_test(&msg); + random_scalar_order_test(&key); + secp256k1_ecmult_gen(&both->ecmult_gen_ctx, &pubj, &key); + secp256k1_ge_set_gej(&pub, &pubj); + + /* obtain a working nonce */ + do { + random_scalar_order_test(&nonce); + } while(!secp256k1_ecdsa_sig_sign(&both->ecmult_gen_ctx, &sigr, &sigs, &key, &msg, &nonce, NULL)); + + /* try signing */ + CHECK(secp256k1_ecdsa_sig_sign(&sign->ecmult_gen_ctx, &sigr, &sigs, &key, &msg, &nonce, NULL)); + CHECK(secp256k1_ecdsa_sig_sign(&both->ecmult_gen_ctx, &sigr, &sigs, &key, &msg, &nonce, NULL)); + + /* try verifying */ + CHECK(secp256k1_ecdsa_sig_verify(&vrfy->ecmult_ctx, &sigr, &sigs, &pub, &msg)); + CHECK(secp256k1_ecdsa_sig_verify(&both->ecmult_ctx, &sigr, &sigs, &pub, &msg)); + + /* cleanup */ + secp256k1_context_destroy(none); + secp256k1_context_destroy(sign); + secp256k1_context_destroy(vrfy); + secp256k1_context_destroy(both); +} + +/***** HASH TESTS *****/ + +void run_sha256_tests(void) { + static const char *inputs[8] = { + "", "abc", "message digest", "secure hash algorithm", "SHA256 is considered to be safe", + "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", + "For this sample, this 63-byte string will be used as input data", + "This is exactly 64 bytes long, not counting the terminating byte" + }; + static const unsigned char outputs[8][32] = { + {0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24, 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, 0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55}, + {0xba, 0x78, 0x16, 0xbf, 0x8f, 0x01, 0xcf, 0xea, 0x41, 0x41, 0x40, 0xde, 0x5d, 0xae, 0x22, 0x23, 0xb0, 0x03, 0x61, 0xa3, 0x96, 0x17, 0x7a, 0x9c, 0xb4, 0x10, 0xff, 0x61, 0xf2, 0x00, 0x15, 0xad}, + {0xf7, 0x84, 0x6f, 0x55, 0xcf, 0x23, 0xe1, 0x4e, 0xeb, 0xea, 0xb5, 0xb4, 0xe1, 0x55, 0x0c, 0xad, 0x5b, 0x50, 0x9e, 0x33, 0x48, 0xfb, 0xc4, 0xef, 0xa3, 0xa1, 0x41, 0x3d, 0x39, 0x3c, 0xb6, 0x50}, + {0xf3, 0x0c, 0xeb, 0x2b, 0xb2, 0x82, 0x9e, 0x79, 0xe4, 0xca, 0x97, 0x53, 0xd3, 0x5a, 0x8e, 0xcc, 0x00, 0x26, 0x2d, 0x16, 0x4c, 0xc0, 0x77, 0x08, 0x02, 0x95, 0x38, 0x1c, 0xbd, 0x64, 0x3f, 0x0d}, + {0x68, 0x19, 0xd9, 0x15, 0xc7, 0x3f, 0x4d, 0x1e, 0x77, 0xe4, 0xe1, 0xb5, 0x2d, 0x1f, 0xa0, 0xf9, 0xcf, 0x9b, 0xea, 0xea, 0xd3, 0x93, 0x9f, 0x15, 0x87, 0x4b, 0xd9, 0x88, 0xe2, 0xa2, 0x36, 0x30}, + {0x24, 0x8d, 0x6a, 0x61, 0xd2, 0x06, 0x38, 0xb8, 0xe5, 0xc0, 0x26, 0x93, 0x0c, 0x3e, 0x60, 0x39, 0xa3, 0x3c, 0xe4, 0x59, 0x64, 0xff, 0x21, 0x67, 0xf6, 0xec, 0xed, 0xd4, 0x19, 0xdb, 0x06, 0xc1}, + {0xf0, 0x8a, 0x78, 0xcb, 0xba, 0xee, 0x08, 0x2b, 0x05, 0x2a, 0xe0, 0x70, 0x8f, 0x32, 0xfa, 0x1e, 0x50, 0xc5, 0xc4, 0x21, 0xaa, 0x77, 0x2b, 0xa5, 0xdb, 0xb4, 0x06, 0xa2, 0xea, 0x6b, 0xe3, 0x42}, + {0xab, 0x64, 0xef, 0xf7, 0xe8, 0x8e, 0x2e, 0x46, 0x16, 0x5e, 0x29, 0xf2, 0xbc, 0xe4, 0x18, 0x26, 0xbd, 0x4c, 0x7b, 0x35, 0x52, 0xf6, 0xb3, 0x82, 0xa9, 0xe7, 0xd3, 0xaf, 0x47, 0xc2, 0x45, 0xf8} + }; + int i; + for (i = 0; i < 8; i++) { + unsigned char out[32]; + secp256k1_sha256_t hasher; + secp256k1_sha256_initialize(&hasher); + secp256k1_sha256_write(&hasher, (const unsigned char*)(inputs[i]), strlen(inputs[i])); + secp256k1_sha256_finalize(&hasher, out); + CHECK(memcmp(out, outputs[i], 32) == 0); + if (strlen(inputs[i]) > 0) { + int split = secp256k1_rand32() % strlen(inputs[i]); + secp256k1_sha256_initialize(&hasher); + secp256k1_sha256_write(&hasher, (const unsigned char*)(inputs[i]), split); + secp256k1_sha256_write(&hasher, (const unsigned char*)(inputs[i] + split), strlen(inputs[i]) - split); + secp256k1_sha256_finalize(&hasher, out); + CHECK(memcmp(out, outputs[i], 32) == 0); + } + } +} + +void run_hmac_sha256_tests(void) { + static const char *keys[6] = { + "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b", + "\x4a\x65\x66\x65", + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa", + "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19", + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa", + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + }; + static const char *inputs[6] = { + "\x48\x69\x20\x54\x68\x65\x72\x65", + "\x77\x68\x61\x74\x20\x64\x6f\x20\x79\x61\x20\x77\x61\x6e\x74\x20\x66\x6f\x72\x20\x6e\x6f\x74\x68\x69\x6e\x67\x3f", + "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd", + "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd", + "\x54\x65\x73\x74\x20\x55\x73\x69\x6e\x67\x20\x4c\x61\x72\x67\x65\x72\x20\x54\x68\x61\x6e\x20\x42\x6c\x6f\x63\x6b\x2d\x53\x69\x7a\x65\x20\x4b\x65\x79\x20\x2d\x20\x48\x61\x73\x68\x20\x4b\x65\x79\x20\x46\x69\x72\x73\x74", + "\x54\x68\x69\x73\x20\x69\x73\x20\x61\x20\x74\x65\x73\x74\x20\x75\x73\x69\x6e\x67\x20\x61\x20\x6c\x61\x72\x67\x65\x72\x20\x74\x68\x61\x6e\x20\x62\x6c\x6f\x63\x6b\x2d\x73\x69\x7a\x65\x20\x6b\x65\x79\x20\x61\x6e\x64\x20\x61\x20\x6c\x61\x72\x67\x65\x72\x20\x74\x68\x61\x6e\x20\x62\x6c\x6f\x63\x6b\x2d\x73\x69\x7a\x65\x20\x64\x61\x74\x61\x2e\x20\x54\x68\x65\x20\x6b\x65\x79\x20\x6e\x65\x65\x64\x73\x20\x74\x6f\x20\x62\x65\x20\x68\x61\x73\x68\x65\x64\x20\x62\x65\x66\x6f\x72\x65\x20\x62\x65\x69\x6e\x67\x20\x75\x73\x65\x64\x20\x62\x79\x20\x74\x68\x65\x20\x48\x4d\x41\x43\x20\x61\x6c\x67\x6f\x72\x69\x74\x68\x6d\x2e" + }; + static const unsigned char outputs[6][32] = { + {0xb0, 0x34, 0x4c, 0x61, 0xd8, 0xdb, 0x38, 0x53, 0x5c, 0xa8, 0xaf, 0xce, 0xaf, 0x0b, 0xf1, 0x2b, 0x88, 0x1d, 0xc2, 0x00, 0xc9, 0x83, 0x3d, 0xa7, 0x26, 0xe9, 0x37, 0x6c, 0x2e, 0x32, 0xcf, 0xf7}, + {0x5b, 0xdc, 0xc1, 0x46, 0xbf, 0x60, 0x75, 0x4e, 0x6a, 0x04, 0x24, 0x26, 0x08, 0x95, 0x75, 0xc7, 0x5a, 0x00, 0x3f, 0x08, 0x9d, 0x27, 0x39, 0x83, 0x9d, 0xec, 0x58, 0xb9, 0x64, 0xec, 0x38, 0x43}, + {0x77, 0x3e, 0xa9, 0x1e, 0x36, 0x80, 0x0e, 0x46, 0x85, 0x4d, 0xb8, 0xeb, 0xd0, 0x91, 0x81, 0xa7, 0x29, 0x59, 0x09, 0x8b, 0x3e, 0xf8, 0xc1, 0x22, 0xd9, 0x63, 0x55, 0x14, 0xce, 0xd5, 0x65, 0xfe}, + {0x82, 0x55, 0x8a, 0x38, 0x9a, 0x44, 0x3c, 0x0e, 0xa4, 0xcc, 0x81, 0x98, 0x99, 0xf2, 0x08, 0x3a, 0x85, 0xf0, 0xfa, 0xa3, 0xe5, 0x78, 0xf8, 0x07, 0x7a, 0x2e, 0x3f, 0xf4, 0x67, 0x29, 0x66, 0x5b}, + {0x60, 0xe4, 0x31, 0x59, 0x1e, 0xe0, 0xb6, 0x7f, 0x0d, 0x8a, 0x26, 0xaa, 0xcb, 0xf5, 0xb7, 0x7f, 0x8e, 0x0b, 0xc6, 0x21, 0x37, 0x28, 0xc5, 0x14, 0x05, 0x46, 0x04, 0x0f, 0x0e, 0xe3, 0x7f, 0x54}, + {0x9b, 0x09, 0xff, 0xa7, 0x1b, 0x94, 0x2f, 0xcb, 0x27, 0x63, 0x5f, 0xbc, 0xd5, 0xb0, 0xe9, 0x44, 0xbf, 0xdc, 0x63, 0x64, 0x4f, 0x07, 0x13, 0x93, 0x8a, 0x7f, 0x51, 0x53, 0x5c, 0x3a, 0x35, 0xe2} + }; + int i; + for (i = 0; i < 6; i++) { + secp256k1_hmac_sha256_t hasher; + unsigned char out[32]; + secp256k1_hmac_sha256_initialize(&hasher, (const unsigned char*)(keys[i]), strlen(keys[i])); + secp256k1_hmac_sha256_write(&hasher, (const unsigned char*)(inputs[i]), strlen(inputs[i])); + secp256k1_hmac_sha256_finalize(&hasher, out); + CHECK(memcmp(out, outputs[i], 32) == 0); + if (strlen(inputs[i]) > 0) { + int split = secp256k1_rand32() % strlen(inputs[i]); + secp256k1_hmac_sha256_initialize(&hasher, (const unsigned char*)(keys[i]), strlen(keys[i])); + secp256k1_hmac_sha256_write(&hasher, (const unsigned char*)(inputs[i]), split); + secp256k1_hmac_sha256_write(&hasher, (const unsigned char*)(inputs[i] + split), strlen(inputs[i]) - split); + secp256k1_hmac_sha256_finalize(&hasher, out); + CHECK(memcmp(out, outputs[i], 32) == 0); + } + } +} + +void run_rfc6979_hmac_sha256_tests(void) { + static const unsigned char key1[65] = {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x00, 0x4b, 0xf5, 0x12, 0x2f, 0x34, 0x45, 0x54, 0xc5, 0x3b, 0xde, 0x2e, 0xbb, 0x8c, 0xd2, 0xb7, 0xe3, 0xd1, 0x60, 0x0a, 0xd6, 0x31, 0xc3, 0x85, 0xa5, 0xd7, 0xcc, 0xe2, 0x3c, 0x77, 0x85, 0x45, 0x9a, 0}; + static const unsigned char out1[3][32] = { + {0x4f, 0xe2, 0x95, 0x25, 0xb2, 0x08, 0x68, 0x09, 0x15, 0x9a, 0xcd, 0xf0, 0x50, 0x6e, 0xfb, 0x86, 0xb0, 0xec, 0x93, 0x2c, 0x7b, 0xa4, 0x42, 0x56, 0xab, 0x32, 0x1e, 0x42, 0x1e, 0x67, 0xe9, 0xfb}, + {0x2b, 0xf0, 0xff, 0xf1, 0xd3, 0xc3, 0x78, 0xa2, 0x2d, 0xc5, 0xde, 0x1d, 0x85, 0x65, 0x22, 0x32, 0x5c, 0x65, 0xb5, 0x04, 0x49, 0x1a, 0x0c, 0xbd, 0x01, 0xcb, 0x8f, 0x3a, 0xa6, 0x7f, 0xfd, 0x4a}, + {0xf5, 0x28, 0xb4, 0x10, 0xcb, 0x54, 0x1f, 0x77, 0x00, 0x0d, 0x7a, 0xfb, 0x6c, 0x5b, 0x53, 0xc5, 0xc4, 0x71, 0xea, 0xb4, 0x3e, 0x46, 0x6d, 0x9a, 0xc5, 0x19, 0x0c, 0x39, 0xc8, 0x2f, 0xd8, 0x2e} + }; + + static const unsigned char key2[64] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24, 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, 0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55}; + static const unsigned char out2[3][32] = { + {0x9c, 0x23, 0x6c, 0x16, 0x5b, 0x82, 0xae, 0x0c, 0xd5, 0x90, 0x65, 0x9e, 0x10, 0x0b, 0x6b, 0xab, 0x30, 0x36, 0xe7, 0xba, 0x8b, 0x06, 0x74, 0x9b, 0xaf, 0x69, 0x81, 0xe1, 0x6f, 0x1a, 0x2b, 0x95}, + {0xdf, 0x47, 0x10, 0x61, 0x62, 0x5b, 0xc0, 0xea, 0x14, 0xb6, 0x82, 0xfe, 0xee, 0x2c, 0x9c, 0x02, 0xf2, 0x35, 0xda, 0x04, 0x20, 0x4c, 0x1d, 0x62, 0xa1, 0x53, 0x6c, 0x6e, 0x17, 0xae, 0xd7, 0xa9}, + {0x75, 0x97, 0x88, 0x7c, 0xbd, 0x76, 0x32, 0x1f, 0x32, 0xe3, 0x04, 0x40, 0x67, 0x9a, 0x22, 0xcf, 0x7f, 0x8d, 0x9d, 0x2e, 0xac, 0x39, 0x0e, 0x58, 0x1f, 0xea, 0x09, 0x1c, 0xe2, 0x02, 0xba, 0x94} + }; + + secp256k1_rfc6979_hmac_sha256_t rng; + unsigned char out[32]; + int i; + + secp256k1_rfc6979_hmac_sha256_initialize(&rng, key1, 64); + for (i = 0; i < 3; i++) { + secp256k1_rfc6979_hmac_sha256_generate(&rng, out, 32); + CHECK(memcmp(out, out1[i], 32) == 0); + } + secp256k1_rfc6979_hmac_sha256_finalize(&rng); + + secp256k1_rfc6979_hmac_sha256_initialize(&rng, key1, 65); + for (i = 0; i < 3; i++) { + secp256k1_rfc6979_hmac_sha256_generate(&rng, out, 32); + CHECK(memcmp(out, out1[i], 32) != 0); + } + secp256k1_rfc6979_hmac_sha256_finalize(&rng); + + secp256k1_rfc6979_hmac_sha256_initialize(&rng, key2, 64); + for (i = 0; i < 3; i++) { + secp256k1_rfc6979_hmac_sha256_generate(&rng, out, 32); + CHECK(memcmp(out, out2[i], 32) == 0); + } + secp256k1_rfc6979_hmac_sha256_finalize(&rng); +} + +/***** NUM TESTS *****/ + +#ifndef USE_NUM_NONE +void random_num_negate(secp256k1_num_t *num) { + if (secp256k1_rand32() & 1) { + secp256k1_num_negate(num); + } +} + +void random_num_order_test(secp256k1_num_t *num) { + secp256k1_scalar_t sc; + random_scalar_order_test(&sc); + secp256k1_scalar_get_num(num, &sc); +} + +void random_num_order(secp256k1_num_t *num) { + secp256k1_scalar_t sc; + random_scalar_order(&sc); + secp256k1_scalar_get_num(num, &sc); +} + +void test_num_negate(void) { + secp256k1_num_t n1; + secp256k1_num_t n2; + random_num_order_test(&n1); /* n1 = R */ + random_num_negate(&n1); + secp256k1_num_copy(&n2, &n1); /* n2 = R */ + secp256k1_num_sub(&n1, &n2, &n1); /* n1 = n2-n1 = 0 */ + CHECK(secp256k1_num_is_zero(&n1)); + secp256k1_num_copy(&n1, &n2); /* n1 = R */ + secp256k1_num_negate(&n1); /* n1 = -R */ + CHECK(!secp256k1_num_is_zero(&n1)); + secp256k1_num_add(&n1, &n2, &n1); /* n1 = n2+n1 = 0 */ + CHECK(secp256k1_num_is_zero(&n1)); + secp256k1_num_copy(&n1, &n2); /* n1 = R */ + secp256k1_num_negate(&n1); /* n1 = -R */ + CHECK(secp256k1_num_is_neg(&n1) != secp256k1_num_is_neg(&n2)); + secp256k1_num_negate(&n1); /* n1 = R */ + CHECK(secp256k1_num_eq(&n1, &n2)); +} + +void test_num_add_sub(void) { + secp256k1_num_t n1; + secp256k1_num_t n2; + secp256k1_num_t n1p2, n2p1, n1m2, n2m1; + int r = secp256k1_rand32(); + random_num_order_test(&n1); /* n1 = R1 */ + if (r & 1) { + random_num_negate(&n1); + } + random_num_order_test(&n2); /* n2 = R2 */ + if (r & 2) { + random_num_negate(&n2); + } + secp256k1_num_add(&n1p2, &n1, &n2); /* n1p2 = R1 + R2 */ + secp256k1_num_add(&n2p1, &n2, &n1); /* n2p1 = R2 + R1 */ + secp256k1_num_sub(&n1m2, &n1, &n2); /* n1m2 = R1 - R2 */ + secp256k1_num_sub(&n2m1, &n2, &n1); /* n2m1 = R2 - R1 */ + CHECK(secp256k1_num_eq(&n1p2, &n2p1)); + CHECK(!secp256k1_num_eq(&n1p2, &n1m2)); + secp256k1_num_negate(&n2m1); /* n2m1 = -R2 + R1 */ + CHECK(secp256k1_num_eq(&n2m1, &n1m2)); + CHECK(!secp256k1_num_eq(&n2m1, &n1)); + secp256k1_num_add(&n2m1, &n2m1, &n2); /* n2m1 = -R2 + R1 + R2 = R1 */ + CHECK(secp256k1_num_eq(&n2m1, &n1)); + CHECK(!secp256k1_num_eq(&n2p1, &n1)); + secp256k1_num_sub(&n2p1, &n2p1, &n2); /* n2p1 = R2 + R1 - R2 = R1 */ + CHECK(secp256k1_num_eq(&n2p1, &n1)); +} + +void run_num_smalltests(void) { + int i; + for (i = 0; i < 100*count; i++) { + test_num_negate(); + test_num_add_sub(); + } +} +#endif + +/***** SCALAR TESTS *****/ + +void scalar_test(void) { + secp256k1_scalar_t s; + secp256k1_scalar_t s1; + secp256k1_scalar_t s2; +#ifndef USE_NUM_NONE + secp256k1_num_t snum, s1num, s2num; + secp256k1_num_t order, half_order; +#endif + unsigned char c[32]; + + /* Set 's' to a random scalar, with value 'snum'. */ + random_scalar_order_test(&s); + + /* Set 's1' to a random scalar, with value 's1num'. */ + random_scalar_order_test(&s1); + + /* Set 's2' to a random scalar, with value 'snum2', and byte array representation 'c'. */ + random_scalar_order_test(&s2); + secp256k1_scalar_get_b32(c, &s2); + +#ifndef USE_NUM_NONE + secp256k1_scalar_get_num(&snum, &s); + secp256k1_scalar_get_num(&s1num, &s1); + secp256k1_scalar_get_num(&s2num, &s2); + + secp256k1_scalar_order_get_num(&order); + half_order = order; + secp256k1_num_shift(&half_order, 1); +#endif + + { + int i; + /* Test that fetching groups of 4 bits from a scalar and recursing n(i)=16*n(i-1)+p(i) reconstructs it. */ + secp256k1_scalar_t n; + secp256k1_scalar_set_int(&n, 0); + for (i = 0; i < 256; i += 4) { + secp256k1_scalar_t t; + int j; + secp256k1_scalar_set_int(&t, secp256k1_scalar_get_bits(&s, 256 - 4 - i, 4)); + for (j = 0; j < 4; j++) { + secp256k1_scalar_add(&n, &n, &n); + } + secp256k1_scalar_add(&n, &n, &t); + } + CHECK(secp256k1_scalar_eq(&n, &s)); + } + + { + /* Test that fetching groups of randomly-sized bits from a scalar and recursing n(i)=b*n(i-1)+p(i) reconstructs it. */ + secp256k1_scalar_t n; + int i = 0; + secp256k1_scalar_set_int(&n, 0); + while (i < 256) { + secp256k1_scalar_t t; + int j; + int now = (secp256k1_rand32() % 15) + 1; + if (now + i > 256) { + now = 256 - i; + } + secp256k1_scalar_set_int(&t, secp256k1_scalar_get_bits_var(&s, 256 - now - i, now)); + for (j = 0; j < now; j++) { + secp256k1_scalar_add(&n, &n, &n); + } + secp256k1_scalar_add(&n, &n, &t); + i += now; + } + CHECK(secp256k1_scalar_eq(&n, &s)); + } + +#ifndef USE_NUM_NONE + { + /* Test that adding the scalars together is equal to adding their numbers together modulo the order. */ + secp256k1_num_t rnum; + secp256k1_num_t r2num; + secp256k1_scalar_t r; + secp256k1_num_add(&rnum, &snum, &s2num); + secp256k1_num_mod(&rnum, &order); + secp256k1_scalar_add(&r, &s, &s2); + secp256k1_scalar_get_num(&r2num, &r); + CHECK(secp256k1_num_eq(&rnum, &r2num)); + } + + { + /* Test that multipying the scalars is equal to multiplying their numbers modulo the order. */ + secp256k1_scalar_t r; + secp256k1_num_t r2num; + secp256k1_num_t rnum; + secp256k1_num_mul(&rnum, &snum, &s2num); + secp256k1_num_mod(&rnum, &order); + secp256k1_scalar_mul(&r, &s, &s2); + secp256k1_scalar_get_num(&r2num, &r); + CHECK(secp256k1_num_eq(&rnum, &r2num)); + /* The result can only be zero if at least one of the factors was zero. */ + CHECK(secp256k1_scalar_is_zero(&r) == (secp256k1_scalar_is_zero(&s) || secp256k1_scalar_is_zero(&s2))); + /* The results can only be equal to one of the factors if that factor was zero, or the other factor was one. */ + CHECK(secp256k1_num_eq(&rnum, &snum) == (secp256k1_scalar_is_zero(&s) || secp256k1_scalar_is_one(&s2))); + CHECK(secp256k1_num_eq(&rnum, &s2num) == (secp256k1_scalar_is_zero(&s2) || secp256k1_scalar_is_one(&s))); + } + + { + secp256k1_scalar_t neg; + secp256k1_num_t negnum; + secp256k1_num_t negnum2; + /* Check that comparison with zero matches comparison with zero on the number. */ + CHECK(secp256k1_num_is_zero(&snum) == secp256k1_scalar_is_zero(&s)); + /* Check that comparison with the half order is equal to testing for high scalar. */ + CHECK(secp256k1_scalar_is_high(&s) == (secp256k1_num_cmp(&snum, &half_order) > 0)); + secp256k1_scalar_negate(&neg, &s); + secp256k1_num_sub(&negnum, &order, &snum); + secp256k1_num_mod(&negnum, &order); + /* Check that comparison with the half order is equal to testing for high scalar after negation. */ + CHECK(secp256k1_scalar_is_high(&neg) == (secp256k1_num_cmp(&negnum, &half_order) > 0)); + /* Negating should change the high property, unless the value was already zero. */ + CHECK((secp256k1_scalar_is_high(&s) == secp256k1_scalar_is_high(&neg)) == secp256k1_scalar_is_zero(&s)); + secp256k1_scalar_get_num(&negnum2, &neg); + /* Negating a scalar should be equal to (order - n) mod order on the number. */ + CHECK(secp256k1_num_eq(&negnum, &negnum2)); + secp256k1_scalar_add(&neg, &neg, &s); + /* Adding a number to its negation should result in zero. */ + CHECK(secp256k1_scalar_is_zero(&neg)); + secp256k1_scalar_negate(&neg, &neg); + /* Negating zero should still result in zero. */ + CHECK(secp256k1_scalar_is_zero(&neg)); + } + + { + /* Test secp256k1_scalar_mul_shift_var. */ + secp256k1_scalar_t r; + secp256k1_num_t one; + secp256k1_num_t rnum; + secp256k1_num_t rnum2; + unsigned char cone[1] = {0x01}; + unsigned int shift = 256 + (secp256k1_rand32() % 257); + secp256k1_scalar_mul_shift_var(&r, &s1, &s2, shift); + secp256k1_num_mul(&rnum, &s1num, &s2num); + secp256k1_num_shift(&rnum, shift - 1); + secp256k1_num_set_bin(&one, cone, 1); + secp256k1_num_add(&rnum, &rnum, &one); + secp256k1_num_shift(&rnum, 1); + secp256k1_scalar_get_num(&rnum2, &r); + CHECK(secp256k1_num_eq(&rnum, &rnum2)); + } + + { + /* test secp256k1_scalar_shr_int */ + secp256k1_scalar_t r; + int i; + int low; + random_scalar_order_test(&r); + for (i = 0; i < 100; ++i) { + int shift = 1 + (secp256k1_rand32() % 15); + int expected = r.d[0] % (1 << shift); + low = secp256k1_scalar_shr_int(&r, shift); + CHECK(expected == low); + } + } +#endif + + { + /* Test that scalar inverses are equal to the inverse of their number modulo the order. */ + if (!secp256k1_scalar_is_zero(&s)) { + secp256k1_scalar_t inv; +#ifndef USE_NUM_NONE + secp256k1_num_t invnum; + secp256k1_num_t invnum2; +#endif + secp256k1_scalar_inverse(&inv, &s); +#ifndef USE_NUM_NONE + secp256k1_num_mod_inverse(&invnum, &snum, &order); + secp256k1_scalar_get_num(&invnum2, &inv); + CHECK(secp256k1_num_eq(&invnum, &invnum2)); +#endif + secp256k1_scalar_mul(&inv, &inv, &s); + /* Multiplying a scalar with its inverse must result in one. */ + CHECK(secp256k1_scalar_is_one(&inv)); + secp256k1_scalar_inverse(&inv, &inv); + /* Inverting one must result in one. */ + CHECK(secp256k1_scalar_is_one(&inv)); + } + } + + { + /* Test commutativity of add. */ + secp256k1_scalar_t r1, r2; + secp256k1_scalar_add(&r1, &s1, &s2); + secp256k1_scalar_add(&r2, &s2, &s1); + CHECK(secp256k1_scalar_eq(&r1, &r2)); + } + + { + secp256k1_scalar_t r1, r2; + secp256k1_scalar_t b; + int i; + /* Test add_bit. */ + int bit = secp256k1_rand32() % 256; + secp256k1_scalar_set_int(&b, 1); + CHECK(secp256k1_scalar_is_one(&b)); + for (i = 0; i < bit; i++) { + secp256k1_scalar_add(&b, &b, &b); + } + r1 = s1; + r2 = s1; + if (!secp256k1_scalar_add(&r1, &r1, &b)) { + /* No overflow happened. */ + secp256k1_scalar_cadd_bit(&r2, bit, 1); + CHECK(secp256k1_scalar_eq(&r1, &r2)); + /* cadd is a noop when flag is zero */ + secp256k1_scalar_cadd_bit(&r2, bit, 0); + CHECK(secp256k1_scalar_eq(&r1, &r2)); + } + } + + { + /* Test commutativity of mul. */ + secp256k1_scalar_t r1, r2; + secp256k1_scalar_mul(&r1, &s1, &s2); + secp256k1_scalar_mul(&r2, &s2, &s1); + CHECK(secp256k1_scalar_eq(&r1, &r2)); + } + + { + /* Test associativity of add. */ + secp256k1_scalar_t r1, r2; + secp256k1_scalar_add(&r1, &s1, &s2); + secp256k1_scalar_add(&r1, &r1, &s); + secp256k1_scalar_add(&r2, &s2, &s); + secp256k1_scalar_add(&r2, &s1, &r2); + CHECK(secp256k1_scalar_eq(&r1, &r2)); + } + + { + /* Test associativity of mul. */ + secp256k1_scalar_t r1, r2; + secp256k1_scalar_mul(&r1, &s1, &s2); + secp256k1_scalar_mul(&r1, &r1, &s); + secp256k1_scalar_mul(&r2, &s2, &s); + secp256k1_scalar_mul(&r2, &s1, &r2); + CHECK(secp256k1_scalar_eq(&r1, &r2)); + } + + { + /* Test distributitivity of mul over add. */ + secp256k1_scalar_t r1, r2, t; + secp256k1_scalar_add(&r1, &s1, &s2); + secp256k1_scalar_mul(&r1, &r1, &s); + secp256k1_scalar_mul(&r2, &s1, &s); + secp256k1_scalar_mul(&t, &s2, &s); + secp256k1_scalar_add(&r2, &r2, &t); + CHECK(secp256k1_scalar_eq(&r1, &r2)); + } + + { + /* Test square. */ + secp256k1_scalar_t r1, r2; + secp256k1_scalar_sqr(&r1, &s1); + secp256k1_scalar_mul(&r2, &s1, &s1); + CHECK(secp256k1_scalar_eq(&r1, &r2)); + } + + { + /* Test multiplicative identity. */ + secp256k1_scalar_t r1, v1; + secp256k1_scalar_set_int(&v1,1); + secp256k1_scalar_mul(&r1, &s1, &v1); + CHECK(secp256k1_scalar_eq(&r1, &s1)); + } + + { + /* Test additive identity. */ + secp256k1_scalar_t r1, v0; + secp256k1_scalar_set_int(&v0,0); + secp256k1_scalar_add(&r1, &s1, &v0); + CHECK(secp256k1_scalar_eq(&r1, &s1)); + } + + { + /* Test zero product property. */ + secp256k1_scalar_t r1, v0; + secp256k1_scalar_set_int(&v0,0); + secp256k1_scalar_mul(&r1, &s1, &v0); + CHECK(secp256k1_scalar_eq(&r1, &v0)); + } + +} + +void run_scalar_tests(void) { + int i; + for (i = 0; i < 128 * count; i++) { + scalar_test(); + } + + { + /* (-1)+1 should be zero. */ + secp256k1_scalar_t s, o; + secp256k1_scalar_set_int(&s, 1); + CHECK(secp256k1_scalar_is_one(&s)); + secp256k1_scalar_negate(&o, &s); + secp256k1_scalar_add(&o, &o, &s); + CHECK(secp256k1_scalar_is_zero(&o)); + secp256k1_scalar_negate(&o, &o); + CHECK(secp256k1_scalar_is_zero(&o)); + } + +#ifndef USE_NUM_NONE + { + /* A scalar with value of the curve order should be 0. */ + secp256k1_num_t order; + secp256k1_scalar_t zero; + unsigned char bin[32]; + int overflow = 0; + secp256k1_scalar_order_get_num(&order); + secp256k1_num_get_bin(bin, 32, &order); + secp256k1_scalar_set_b32(&zero, bin, &overflow); + CHECK(overflow == 1); + CHECK(secp256k1_scalar_is_zero(&zero)); + } +#endif +} + +/***** FIELD TESTS *****/ + +void random_fe(secp256k1_fe_t *x) { + unsigned char bin[32]; + do { + secp256k1_rand256(bin); + if (secp256k1_fe_set_b32(x, bin)) { + return; + } + } while(1); +} + +void random_fe_non_zero(secp256k1_fe_t *nz) { + int tries = 10; + while (--tries >= 0) { + random_fe(nz); + secp256k1_fe_normalize(nz); + if (!secp256k1_fe_is_zero(nz)) { + break; + } + } + /* Infinitesimal probability of spurious failure here */ + CHECK(tries >= 0); +} + +void random_fe_non_square(secp256k1_fe_t *ns) { + secp256k1_fe_t r; + random_fe_non_zero(ns); + if (secp256k1_fe_sqrt_var(&r, ns)) { + secp256k1_fe_negate(ns, ns, 1); + } +} + +int check_fe_equal(const secp256k1_fe_t *a, const secp256k1_fe_t *b) { + secp256k1_fe_t an = *a; + secp256k1_fe_t bn = *b; + secp256k1_fe_normalize_weak(&an); + secp256k1_fe_normalize_var(&bn); + return secp256k1_fe_equal_var(&an, &bn); +} + +int check_fe_inverse(const secp256k1_fe_t *a, const secp256k1_fe_t *ai) { + secp256k1_fe_t x; + secp256k1_fe_t one = SECP256K1_FE_CONST(0, 0, 0, 0, 0, 0, 0, 1); + secp256k1_fe_mul(&x, a, ai); + return check_fe_equal(&x, &one); +} + +void run_field_convert(void) { + static const unsigned char b32[32] = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, + 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, + 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x40 + }; + static const secp256k1_fe_storage_t fes = SECP256K1_FE_STORAGE_CONST( + 0x00010203UL, 0x04050607UL, 0x11121314UL, 0x15161718UL, + 0x22232425UL, 0x26272829UL, 0x33343536UL, 0x37383940UL + ); + static const secp256k1_fe_t fe = SECP256K1_FE_CONST( + 0x00010203UL, 0x04050607UL, 0x11121314UL, 0x15161718UL, + 0x22232425UL, 0x26272829UL, 0x33343536UL, 0x37383940UL + ); + secp256k1_fe_t fe2; + unsigned char b322[32]; + secp256k1_fe_storage_t fes2; + /* Check conversions to fe. */ + CHECK(secp256k1_fe_set_b32(&fe2, b32)); + CHECK(secp256k1_fe_equal_var(&fe, &fe2)); + secp256k1_fe_from_storage(&fe2, &fes); + CHECK(secp256k1_fe_equal_var(&fe, &fe2)); + /* Check conversion from fe. */ + secp256k1_fe_get_b32(b322, &fe); + CHECK(memcmp(b322, b32, 32) == 0); + secp256k1_fe_to_storage(&fes2, &fe); + CHECK(memcmp(&fes2, &fes, sizeof(fes)) == 0); +} + +int fe_memcmp(const secp256k1_fe_t *a, const secp256k1_fe_t *b) { + secp256k1_fe_t t = *b; +#ifdef VERIFY + t.magnitude = a->magnitude; + t.normalized = a->normalized; +#endif + return memcmp(a, &t, sizeof(secp256k1_fe_t)); +} + +void run_field_misc(void) { + secp256k1_fe_t x; + secp256k1_fe_t y; + secp256k1_fe_t z; + secp256k1_fe_t q; + secp256k1_fe_t fe5 = SECP256K1_FE_CONST(0, 0, 0, 0, 0, 0, 0, 5); + int i, j; + for (i = 0; i < 5*count; i++) { + secp256k1_fe_storage_t xs, ys, zs; + random_fe(&x); + random_fe_non_zero(&y); + /* Test the fe equality and comparison operations. */ + CHECK(secp256k1_fe_cmp_var(&x, &x) == 0); + CHECK(secp256k1_fe_equal_var(&x, &x)); + z = x; + secp256k1_fe_add(&z,&y); + /* Test fe conditional move; z is not normalized here. */ + q = x; + secp256k1_fe_cmov(&x, &z, 0); + VERIFY_CHECK(!x.normalized && x.magnitude == z.magnitude); + secp256k1_fe_cmov(&x, &x, 1); + CHECK(fe_memcmp(&x, &z) != 0); + CHECK(fe_memcmp(&x, &q) == 0); + secp256k1_fe_cmov(&q, &z, 1); + VERIFY_CHECK(!q.normalized && q.magnitude == z.magnitude); + CHECK(fe_memcmp(&q, &z) == 0); + secp256k1_fe_normalize_var(&x); + secp256k1_fe_normalize_var(&z); + CHECK(!secp256k1_fe_equal_var(&x, &z)); + secp256k1_fe_normalize_var(&q); + secp256k1_fe_cmov(&q, &z, (i&1)); + VERIFY_CHECK(q.normalized && q.magnitude == 1); + for (j = 0; j < 6; j++) { + secp256k1_fe_negate(&z, &z, j+1); + secp256k1_fe_normalize_var(&q); + secp256k1_fe_cmov(&q, &z, (j&1)); + VERIFY_CHECK(!q.normalized && q.magnitude == (j+2)); + } + secp256k1_fe_normalize_var(&z); + /* Test storage conversion and conditional moves. */ + secp256k1_fe_to_storage(&xs, &x); + secp256k1_fe_to_storage(&ys, &y); + secp256k1_fe_to_storage(&zs, &z); + secp256k1_fe_storage_cmov(&zs, &xs, 0); + secp256k1_fe_storage_cmov(&zs, &zs, 1); + CHECK(memcmp(&xs, &zs, sizeof(xs)) != 0); + secp256k1_fe_storage_cmov(&ys, &xs, 1); + CHECK(memcmp(&xs, &ys, sizeof(xs)) == 0); + secp256k1_fe_from_storage(&x, &xs); + secp256k1_fe_from_storage(&y, &ys); + secp256k1_fe_from_storage(&z, &zs); + /* Test that mul_int, mul, and add agree. */ + secp256k1_fe_add(&y, &x); + secp256k1_fe_add(&y, &x); + z = x; + secp256k1_fe_mul_int(&z, 3); + CHECK(check_fe_equal(&y, &z)); + secp256k1_fe_add(&y, &x); + secp256k1_fe_add(&z, &x); + CHECK(check_fe_equal(&z, &y)); + z = x; + secp256k1_fe_mul_int(&z, 5); + secp256k1_fe_mul(&q, &x, &fe5); + CHECK(check_fe_equal(&z, &q)); + secp256k1_fe_negate(&x, &x, 1); + secp256k1_fe_add(&z, &x); + secp256k1_fe_add(&q, &x); + CHECK(check_fe_equal(&y, &z)); + CHECK(check_fe_equal(&q, &y)); + } +} + +void run_field_inv(void) { + secp256k1_fe_t x, xi, xii; + int i; + for (i = 0; i < 10*count; i++) { + random_fe_non_zero(&x); + secp256k1_fe_inv(&xi, &x); + CHECK(check_fe_inverse(&x, &xi)); + secp256k1_fe_inv(&xii, &xi); + CHECK(check_fe_equal(&x, &xii)); + } +} + +void run_field_inv_var(void) { + secp256k1_fe_t x, xi, xii; + int i; + for (i = 0; i < 10*count; i++) { + random_fe_non_zero(&x); + secp256k1_fe_inv_var(&xi, &x); + CHECK(check_fe_inverse(&x, &xi)); + secp256k1_fe_inv_var(&xii, &xi); + CHECK(check_fe_equal(&x, &xii)); + } +} + +void run_field_inv_all_var(void) { + secp256k1_fe_t x[16], xi[16], xii[16]; + int i; + /* Check it's safe to call for 0 elements */ + secp256k1_fe_inv_all_var(0, xi, x); + for (i = 0; i < count; i++) { + size_t j; + size_t len = (secp256k1_rand32() & 15) + 1; + for (j = 0; j < len; j++) { + random_fe_non_zero(&x[j]); + } + secp256k1_fe_inv_all_var(len, xi, x); + for (j = 0; j < len; j++) { + CHECK(check_fe_inverse(&x[j], &xi[j])); + } + secp256k1_fe_inv_all_var(len, xii, xi); + for (j = 0; j < len; j++) { + CHECK(check_fe_equal(&x[j], &xii[j])); + } + } +} + +void run_sqr(void) { + secp256k1_fe_t x, s; + + { + int i; + secp256k1_fe_set_int(&x, 1); + secp256k1_fe_negate(&x, &x, 1); + + for (i = 1; i <= 512; ++i) { + secp256k1_fe_mul_int(&x, 2); + secp256k1_fe_normalize(&x); + secp256k1_fe_sqr(&s, &x); + } + } +} + +void test_sqrt(const secp256k1_fe_t *a, const secp256k1_fe_t *k) { + secp256k1_fe_t r1, r2; + int v = secp256k1_fe_sqrt_var(&r1, a); + CHECK((v == 0) == (k == NULL)); + + if (k != NULL) { + /* Check that the returned root is +/- the given known answer */ + secp256k1_fe_negate(&r2, &r1, 1); + secp256k1_fe_add(&r1, k); secp256k1_fe_add(&r2, k); + secp256k1_fe_normalize(&r1); secp256k1_fe_normalize(&r2); + CHECK(secp256k1_fe_is_zero(&r1) || secp256k1_fe_is_zero(&r2)); + } +} + +void run_sqrt(void) { + secp256k1_fe_t ns, x, s, t; + int i; + + /* Check sqrt(0) is 0 */ + secp256k1_fe_set_int(&x, 0); + secp256k1_fe_sqr(&s, &x); + test_sqrt(&s, &x); + + /* Check sqrt of small squares (and their negatives) */ + for (i = 1; i <= 100; i++) { + secp256k1_fe_set_int(&x, i); + secp256k1_fe_sqr(&s, &x); + test_sqrt(&s, &x); + secp256k1_fe_negate(&t, &s, 1); + test_sqrt(&t, NULL); + } + + /* Consistency checks for large random values */ + for (i = 0; i < 10; i++) { + int j; + random_fe_non_square(&ns); + for (j = 0; j < count; j++) { + random_fe(&x); + secp256k1_fe_sqr(&s, &x); + test_sqrt(&s, &x); + secp256k1_fe_negate(&t, &s, 1); + test_sqrt(&t, NULL); + secp256k1_fe_mul(&t, &s, &ns); + test_sqrt(&t, NULL); + } + } +} + +/***** GROUP TESTS *****/ + +void ge_equals_ge(const secp256k1_ge_t *a, const secp256k1_ge_t *b) { + CHECK(a->infinity == b->infinity); + if (a->infinity) { + return; + } + CHECK(secp256k1_fe_equal_var(&a->x, &b->x)); + CHECK(secp256k1_fe_equal_var(&a->y, &b->y)); +} + +/* This compares jacobian points including their Z, not just their geometric meaning. */ +int gej_xyz_equals_gej(const secp256k1_gej_t *a, const secp256k1_gej_t *b) { + secp256k1_gej_t a2; + secp256k1_gej_t b2; + int ret = 1; + ret &= a->infinity == b->infinity; + if (ret && !a->infinity) { + a2 = *a; + b2 = *b; + secp256k1_fe_normalize(&a2.x); + secp256k1_fe_normalize(&a2.y); + secp256k1_fe_normalize(&a2.z); + secp256k1_fe_normalize(&b2.x); + secp256k1_fe_normalize(&b2.y); + secp256k1_fe_normalize(&b2.z); + ret &= secp256k1_fe_cmp_var(&a2.x, &b2.x) == 0; + ret &= secp256k1_fe_cmp_var(&a2.y, &b2.y) == 0; + ret &= secp256k1_fe_cmp_var(&a2.z, &b2.z) == 0; + } + return ret; +} + +void ge_equals_gej(const secp256k1_ge_t *a, const secp256k1_gej_t *b) { + secp256k1_fe_t z2s; + secp256k1_fe_t u1, u2, s1, s2; + CHECK(a->infinity == b->infinity); + if (a->infinity) { + return; + } + /* Check a.x * b.z^2 == b.x && a.y * b.z^3 == b.y, to avoid inverses. */ + secp256k1_fe_sqr(&z2s, &b->z); + secp256k1_fe_mul(&u1, &a->x, &z2s); + u2 = b->x; secp256k1_fe_normalize_weak(&u2); + secp256k1_fe_mul(&s1, &a->y, &z2s); secp256k1_fe_mul(&s1, &s1, &b->z); + s2 = b->y; secp256k1_fe_normalize_weak(&s2); + CHECK(secp256k1_fe_equal_var(&u1, &u2)); + CHECK(secp256k1_fe_equal_var(&s1, &s2)); +} + +void test_ge(void) { + int i, i1; +#ifdef USE_ENDOMORPHISM + int runs = 6; +#else + int runs = 4; +#endif + /* Points: (infinity, p1, p1, -p1, -p1, p2, p2, -p2, -p2, p3, p3, -p3, -p3, p4, p4, -p4, -p4). + * The second in each pair of identical points uses a random Z coordinate in the Jacobian form. + * All magnitudes are randomized. + * All 17*17 combinations of points are added to eachother, using all applicable methods. + * + * When the endomorphism code is compiled in, p5 = lambda*p1 and p6 = lambda^2*p1 are added as well. + */ + secp256k1_ge_t *ge = (secp256k1_ge_t *)malloc(sizeof(secp256k1_ge_t) * (1 + 4 * runs)); + secp256k1_gej_t *gej = (secp256k1_gej_t *)malloc(sizeof(secp256k1_gej_t) * (1 + 4 * runs)); + secp256k1_fe_t *zinv = (secp256k1_fe_t *)malloc(sizeof(secp256k1_fe_t) * (1 + 4 * runs)); + secp256k1_fe_t zf; + secp256k1_fe_t zfi2, zfi3; + + secp256k1_gej_set_infinity(&gej[0]); + secp256k1_ge_clear(&ge[0]); + secp256k1_ge_set_gej_var(&ge[0], &gej[0]); + for (i = 0; i < runs; i++) { + int j; + secp256k1_ge_t g; + random_group_element_test(&g); +#ifdef USE_ENDOMORPHISM + if (i >= runs - 2) { + secp256k1_ge_mul_lambda(&g, &ge[1]); + } + if (i >= runs - 1) { + secp256k1_ge_mul_lambda(&g, &g); + } +#endif + ge[1 + 4 * i] = g; + ge[2 + 4 * i] = g; + secp256k1_ge_neg(&ge[3 + 4 * i], &g); + secp256k1_ge_neg(&ge[4 + 4 * i], &g); + secp256k1_gej_set_ge(&gej[1 + 4 * i], &ge[1 + 4 * i]); + random_group_element_jacobian_test(&gej[2 + 4 * i], &ge[2 + 4 * i]); + secp256k1_gej_set_ge(&gej[3 + 4 * i], &ge[3 + 4 * i]); + random_group_element_jacobian_test(&gej[4 + 4 * i], &ge[4 + 4 * i]); + for (j = 0; j < 4; j++) { + random_field_element_magnitude(&ge[1 + j + 4 * i].x); + random_field_element_magnitude(&ge[1 + j + 4 * i].y); + random_field_element_magnitude(&gej[1 + j + 4 * i].x); + random_field_element_magnitude(&gej[1 + j + 4 * i].y); + random_field_element_magnitude(&gej[1 + j + 4 * i].z); + } + } + + /* Compute z inverses. */ + { + secp256k1_fe_t *zs = malloc(sizeof(secp256k1_fe_t) * (1 + 4 * runs)); + for (i = 0; i < 4 * runs + 1; i++) { + if (i == 0) { + /* The point at infinity does not have a meaningful z inverse. Any should do. */ + do { + random_field_element_test(&zs[i]); + } while(secp256k1_fe_is_zero(&zs[i])); + } else { + zs[i] = gej[i].z; + } + } + secp256k1_fe_inv_all_var(4 * runs + 1, zinv, zs); + free(zs); + } + + /* Generate random zf, and zfi2 = 1/zf^2, zfi3 = 1/zf^3 */ + do { + random_field_element_test(&zf); + } while(secp256k1_fe_is_zero(&zf)); + random_field_element_magnitude(&zf); + secp256k1_fe_inv_var(&zfi3, &zf); + secp256k1_fe_sqr(&zfi2, &zfi3); + secp256k1_fe_mul(&zfi3, &zfi3, &zfi2); + + for (i1 = 0; i1 < 1 + 4 * runs; i1++) { + int i2; + for (i2 = 0; i2 < 1 + 4 * runs; i2++) { + /* Compute reference result using gej + gej (var). */ + secp256k1_gej_t refj, resj; + secp256k1_ge_t ref; + secp256k1_fe_t zr; + secp256k1_gej_add_var(&refj, &gej[i1], &gej[i2], secp256k1_gej_is_infinity(&gej[i1]) ? NULL : &zr); + /* Check Z ratio. */ + if (!secp256k1_gej_is_infinity(&gej[i1]) && !secp256k1_gej_is_infinity(&refj)) { + secp256k1_fe_t zrz; secp256k1_fe_mul(&zrz, &zr, &gej[i1].z); + CHECK(secp256k1_fe_equal_var(&zrz, &refj.z)); + } + secp256k1_ge_set_gej_var(&ref, &refj); + + /* Test gej + ge with Z ratio result (var). */ + secp256k1_gej_add_ge_var(&resj, &gej[i1], &ge[i2], secp256k1_gej_is_infinity(&gej[i1]) ? NULL : &zr); + ge_equals_gej(&ref, &resj); + if (!secp256k1_gej_is_infinity(&gej[i1]) && !secp256k1_gej_is_infinity(&resj)) { + secp256k1_fe_t zrz; secp256k1_fe_mul(&zrz, &zr, &gej[i1].z); + CHECK(secp256k1_fe_equal_var(&zrz, &resj.z)); + } + + /* Test gej + ge (var, with additional Z factor). */ + { + secp256k1_ge_t ge2_zfi = ge[i2]; /* the second term with x and y rescaled for z = 1/zf */ + secp256k1_fe_mul(&ge2_zfi.x, &ge2_zfi.x, &zfi2); + secp256k1_fe_mul(&ge2_zfi.y, &ge2_zfi.y, &zfi3); + random_field_element_magnitude(&ge2_zfi.x); + random_field_element_magnitude(&ge2_zfi.y); + secp256k1_gej_add_zinv_var(&resj, &gej[i1], &ge2_zfi, &zf); + ge_equals_gej(&ref, &resj); + } + + /* Test gej + ge (const). */ + if (i2 != 0) { + /* secp256k1_gej_add_ge does not support its second argument being infinity. */ + secp256k1_gej_add_ge(&resj, &gej[i1], &ge[i2]); + ge_equals_gej(&ref, &resj); + } + + /* Test doubling (var). */ + if ((i1 == 0 && i2 == 0) || ((i1 + 3)/4 == (i2 + 3)/4 && ((i1 + 3)%4)/2 == ((i2 + 3)%4)/2)) { + secp256k1_fe_t zr2; + /* Normal doubling with Z ratio result. */ + secp256k1_gej_double_var(&resj, &gej[i1], &zr2); + ge_equals_gej(&ref, &resj); + /* Check Z ratio. */ + secp256k1_fe_mul(&zr2, &zr2, &gej[i1].z); + CHECK(secp256k1_fe_equal_var(&zr2, &resj.z)); + /* Normal doubling. */ + secp256k1_gej_double_var(&resj, &gej[i2], NULL); + ge_equals_gej(&ref, &resj); + } + + /* Test adding opposites. */ + if ((i1 == 0 && i2 == 0) || ((i1 + 3)/4 == (i2 + 3)/4 && ((i1 + 3)%4)/2 != ((i2 + 3)%4)/2)) { + CHECK(secp256k1_ge_is_infinity(&ref)); + } + + /* Test adding infinity. */ + if (i1 == 0) { + CHECK(secp256k1_ge_is_infinity(&ge[i1])); + CHECK(secp256k1_gej_is_infinity(&gej[i1])); + ge_equals_gej(&ref, &gej[i2]); + } + if (i2 == 0) { + CHECK(secp256k1_ge_is_infinity(&ge[i2])); + CHECK(secp256k1_gej_is_infinity(&gej[i2])); + ge_equals_gej(&ref, &gej[i1]); + } + } + } + + /* Test adding all points together in random order equals infinity. */ + { + secp256k1_gej_t sum = SECP256K1_GEJ_CONST_INFINITY; + secp256k1_gej_t *gej_shuffled = (secp256k1_gej_t *)malloc((4 * runs + 1) * sizeof(secp256k1_gej_t)); + for (i = 0; i < 4 * runs + 1; i++) { + gej_shuffled[i] = gej[i]; + } + for (i = 0; i < 4 * runs + 1; i++) { + int swap = i + secp256k1_rand32() % (4 * runs + 1 - i); + if (swap != i) { + secp256k1_gej_t t = gej_shuffled[i]; + gej_shuffled[i] = gej_shuffled[swap]; + gej_shuffled[swap] = t; + } + } + for (i = 0; i < 4 * runs + 1; i++) { + secp256k1_gej_add_var(&sum, &sum, &gej_shuffled[i], NULL); + } + CHECK(secp256k1_gej_is_infinity(&sum)); + free(gej_shuffled); + } + + /* Test batch gej -> ge conversion with and without known z ratios. */ + { + secp256k1_fe_t *zr = (secp256k1_fe_t *)malloc((4 * runs + 1) * sizeof(secp256k1_fe_t)); + secp256k1_ge_t *ge_set_table = (secp256k1_ge_t *)malloc((4 * runs + 1) * sizeof(secp256k1_ge_t)); + secp256k1_ge_t *ge_set_all = (secp256k1_ge_t *)malloc((4 * runs + 1) * sizeof(secp256k1_ge_t)); + for (i = 0; i < 4 * runs + 1; i++) { + /* Compute gej[i + 1].z / gez[i].z (with gej[n].z taken to be 1). */ + if (i < 4 * runs) { + secp256k1_fe_mul(&zr[i + 1], &zinv[i], &gej[i + 1].z); + } + } + secp256k1_ge_set_table_gej_var(4 * runs + 1, ge_set_table, gej, zr); + secp256k1_ge_set_all_gej_var(4 * runs + 1, ge_set_all, gej, &ctx->error_callback); + for (i = 0; i < 4 * runs + 1; i++) { + secp256k1_fe_t s; + random_fe_non_zero(&s); + secp256k1_gej_rescale(&gej[i], &s); + ge_equals_gej(&ge_set_table[i], &gej[i]); + ge_equals_gej(&ge_set_all[i], &gej[i]); + } + free(ge_set_table); + free(ge_set_all); + free(zr); + } + + free(ge); + free(gej); + free(zinv); +} + +void test_add_neg_y_diff_x(void) { + /* The point of this test is to check that we can add two points + * whose y-coordinates are negatives of each other but whose x + * coordinates differ. If the x-coordinates were the same, these + * points would be negatives of each other and their sum is + * infinity. This is cool because it "covers up" any degeneracy + * in the addition algorithm that would cause the xy coordinates + * of the sum to be wrong (since infinity has no xy coordinates). + * HOWEVER, if the x-coordinates are different, infinity is the + * wrong answer, and such degeneracies are exposed. This is the + * root of https://github.com/bitcoin/secp256k1/issues/257 which + * this test is a regression test for. + * + * These points were generated in sage as + * # secp256k1 params + * F = FiniteField (0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFC2F) + * C = EllipticCurve ([F (0), F (7)]) + * G = C.lift_x(0x79BE667EF9DCBBAC55A06295CE870B07029BFCDB2DCE28D959F2815B16F81798) + * N = FiniteField(G.order()) + * + * # endomorphism values (lambda is 1^{1/3} in N, beta is 1^{1/3} in F) + * x = polygen(N) + * lam = (1 - x^3).roots()[1][0] + * + * # random "bad pair" + * P = C.random_element() + * Q = -int(lam) * P + * print " P: %x %x" % P.xy() + * print " Q: %x %x" % Q.xy() + * print "P + Q: %x %x" % (P + Q).xy() + */ + secp256k1_gej_t aj = SECP256K1_GEJ_CONST( + 0x8d24cd95, 0x0a355af1, 0x3c543505, 0x44238d30, + 0x0643d79f, 0x05a59614, 0x2f8ec030, 0xd58977cb, + 0x001e337a, 0x38093dcd, 0x6c0f386d, 0x0b1293a8, + 0x4d72c879, 0xd7681924, 0x44e6d2f3, 0x9190117d + ); + secp256k1_gej_t bj = SECP256K1_GEJ_CONST( + 0xc7b74206, 0x1f788cd9, 0xabd0937d, 0x164a0d86, + 0x95f6ff75, 0xf19a4ce9, 0xd013bd7b, 0xbf92d2a7, + 0xffe1cc85, 0xc7f6c232, 0x93f0c792, 0xf4ed6c57, + 0xb28d3786, 0x2897e6db, 0xbb192d0b, 0x6e6feab2 + ); + secp256k1_gej_t sumj = SECP256K1_GEJ_CONST( + 0x671a63c0, 0x3efdad4c, 0x389a7798, 0x24356027, + 0xb3d69010, 0x278625c3, 0x5c86d390, 0x184a8f7a, + 0x5f6409c2, 0x2ce01f2b, 0x511fd375, 0x25071d08, + 0xda651801, 0x70e95caf, 0x8f0d893c, 0xbed8fbbe + ); + secp256k1_ge_t b; + secp256k1_gej_t resj; + secp256k1_ge_t res; + secp256k1_ge_set_gej(&b, &bj); + + secp256k1_gej_add_var(&resj, &aj, &bj, NULL); + secp256k1_ge_set_gej(&res, &resj); + ge_equals_gej(&res, &sumj); + + secp256k1_gej_add_ge(&resj, &aj, &b); + secp256k1_ge_set_gej(&res, &resj); + ge_equals_gej(&res, &sumj); + + secp256k1_gej_add_ge_var(&resj, &aj, &b, NULL); + secp256k1_ge_set_gej(&res, &resj); + ge_equals_gej(&res, &sumj); +} + +void run_ge(void) { + int i; + for (i = 0; i < count * 32; i++) { + test_ge(); + } + test_add_neg_y_diff_x(); +} + +void test_ec_combine(void) { + secp256k1_scalar_t sum = SECP256K1_SCALAR_CONST(0, 0, 0, 0, 0, 0, 0, 0); + secp256k1_pubkey_t data[6]; + const secp256k1_pubkey_t* d[6]; + secp256k1_pubkey_t sd; + secp256k1_pubkey_t sd2; + secp256k1_gej_t Qj; + secp256k1_ge_t Q; + int i; + for (i = 1; i <= 6; i++) { + secp256k1_scalar_t s; + random_scalar_order_test(&s); + secp256k1_scalar_add(&sum, &sum, &s); + secp256k1_ecmult_gen(&ctx->ecmult_gen_ctx, &Qj, &s); + secp256k1_ge_set_gej(&Q, &Qj); + secp256k1_pubkey_save(&data[i - 1], &Q); + d[i - 1] = &data[i - 1]; + secp256k1_ecmult_gen(&ctx->ecmult_gen_ctx, &Qj, &sum); + secp256k1_ge_set_gej(&Q, &Qj); + secp256k1_pubkey_save(&sd, &Q); + CHECK(secp256k1_ec_pubkey_combine(ctx, &sd2, i, d) == 1); + CHECK(memcmp(&sd, &sd2, sizeof(sd)) == 0); + } +} + +void run_ec_combine(void) { + int i; + for (i = 0; i < count * 8; i++) { + test_ec_combine(); + } +} + +/***** ECMULT TESTS *****/ + +void run_ecmult_chain(void) { + /* random starting point A (on the curve) */ + secp256k1_gej_t a = SECP256K1_GEJ_CONST( + 0x8b30bbe9, 0xae2a9906, 0x96b22f67, 0x0709dff3, + 0x727fd8bc, 0x04d3362c, 0x6c7bf458, 0xe2846004, + 0xa357ae91, 0x5c4a6528, 0x1309edf2, 0x0504740f, + 0x0eb33439, 0x90216b4f, 0x81063cb6, 0x5f2f7e0f + ); + /* two random initial factors xn and gn */ + secp256k1_scalar_t xn = SECP256K1_SCALAR_CONST( + 0x84cc5452, 0xf7fde1ed, 0xb4d38a8c, 0xe9b1b84c, + 0xcef31f14, 0x6e569be9, 0x705d357a, 0x42985407 + ); + secp256k1_scalar_t gn = SECP256K1_SCALAR_CONST( + 0xa1e58d22, 0x553dcd42, 0xb2398062, 0x5d4c57a9, + 0x6e9323d4, 0x2b3152e5, 0xca2c3990, 0xedc7c9de + ); + /* two small multipliers to be applied to xn and gn in every iteration: */ + static const secp256k1_scalar_t xf = SECP256K1_SCALAR_CONST(0, 0, 0, 0, 0, 0, 0, 0x1337); + static const secp256k1_scalar_t gf = SECP256K1_SCALAR_CONST(0, 0, 0, 0, 0, 0, 0, 0x7113); + /* accumulators with the resulting coefficients to A and G */ + secp256k1_scalar_t ae = SECP256K1_SCALAR_CONST(0, 0, 0, 0, 0, 0, 0, 1); + secp256k1_scalar_t ge = SECP256K1_SCALAR_CONST(0, 0, 0, 0, 0, 0, 0, 0); + /* actual points */ + secp256k1_gej_t x = a; + secp256k1_gej_t x2; + int i; + + /* the point being computed */ + x = a; + for (i = 0; i < 200*count; i++) { + /* in each iteration, compute X = xn*X + gn*G; */ + secp256k1_ecmult(&ctx->ecmult_ctx, &x, &x, &xn, &gn); + /* also compute ae and ge: the actual accumulated factors for A and G */ + /* if X was (ae*A+ge*G), xn*X + gn*G results in (xn*ae*A + (xn*ge+gn)*G) */ + secp256k1_scalar_mul(&ae, &ae, &xn); + secp256k1_scalar_mul(&ge, &ge, &xn); + secp256k1_scalar_add(&ge, &ge, &gn); + /* modify xn and gn */ + secp256k1_scalar_mul(&xn, &xn, &xf); + secp256k1_scalar_mul(&gn, &gn, &gf); + + /* verify */ + if (i == 19999) { + /* expected result after 19999 iterations */ + secp256k1_gej_t rp = SECP256K1_GEJ_CONST( + 0xD6E96687, 0xF9B10D09, 0x2A6F3543, 0x9D86CEBE, + 0xA4535D0D, 0x409F5358, 0x6440BD74, 0xB933E830, + 0xB95CBCA2, 0xC77DA786, 0x539BE8FD, 0x53354D2D, + 0x3B4F566A, 0xE6580454, 0x07ED6015, 0xEE1B2A88 + ); + + secp256k1_gej_neg(&rp, &rp); + secp256k1_gej_add_var(&rp, &rp, &x, NULL); + CHECK(secp256k1_gej_is_infinity(&rp)); + } + } + /* redo the computation, but directly with the resulting ae and ge coefficients: */ + secp256k1_ecmult(&ctx->ecmult_ctx, &x2, &a, &ae, &ge); + secp256k1_gej_neg(&x2, &x2); + secp256k1_gej_add_var(&x2, &x2, &x, NULL); + CHECK(secp256k1_gej_is_infinity(&x2)); +} + +void test_point_times_order(const secp256k1_gej_t *point) { + /* X * (point + G) + (order-X) * (pointer + G) = 0 */ + secp256k1_scalar_t x; + secp256k1_scalar_t nx; + secp256k1_scalar_t zero = SECP256K1_SCALAR_CONST(0, 0, 0, 0, 0, 0, 0, 0); + secp256k1_scalar_t one = SECP256K1_SCALAR_CONST(0, 0, 0, 0, 0, 0, 0, 1); + secp256k1_gej_t res1, res2; + secp256k1_ge_t res3; + unsigned char pub[65]; + int psize = 65; + random_scalar_order_test(&x); + secp256k1_scalar_negate(&nx, &x); + secp256k1_ecmult(&ctx->ecmult_ctx, &res1, point, &x, &x); /* calc res1 = x * point + x * G; */ + secp256k1_ecmult(&ctx->ecmult_ctx, &res2, point, &nx, &nx); /* calc res2 = (order - x) * point + (order - x) * G; */ + secp256k1_gej_add_var(&res1, &res1, &res2, NULL); + CHECK(secp256k1_gej_is_infinity(&res1)); + CHECK(secp256k1_gej_is_valid_var(&res1) == 0); + secp256k1_ge_set_gej(&res3, &res1); + CHECK(secp256k1_ge_is_infinity(&res3)); + CHECK(secp256k1_ge_is_valid_var(&res3) == 0); + CHECK(secp256k1_eckey_pubkey_serialize(&res3, pub, &psize, 0) == 0); + psize = 65; + CHECK(secp256k1_eckey_pubkey_serialize(&res3, pub, &psize, 1) == 0); + /* check zero/one edge cases */ + secp256k1_ecmult(&ctx->ecmult_ctx, &res1, point, &zero, &zero); + secp256k1_ge_set_gej(&res3, &res1); + CHECK(secp256k1_ge_is_infinity(&res3)); + secp256k1_ecmult(&ctx->ecmult_ctx, &res1, point, &one, &zero); + secp256k1_ge_set_gej(&res3, &res1); + ge_equals_gej(&res3, point); + secp256k1_ecmult(&ctx->ecmult_ctx, &res1, point, &zero, &one); + secp256k1_ge_set_gej(&res3, &res1); + ge_equals_ge(&res3, &secp256k1_ge_const_g); +} + +void run_point_times_order(void) { + int i; + secp256k1_fe_t x = SECP256K1_FE_CONST(0, 0, 0, 0, 0, 0, 0, 2); + static const secp256k1_fe_t xr = SECP256K1_FE_CONST( + 0x7603CB59, 0xB0EF6C63, 0xFE608479, 0x2A0C378C, + 0xDB3233A8, 0x0F8A9A09, 0xA877DEAD, 0x31B38C45 + ); + for (i = 0; i < 500; i++) { + secp256k1_ge_t p; + if (secp256k1_ge_set_xo_var(&p, &x, 1)) { + secp256k1_gej_t j; + CHECK(secp256k1_ge_is_valid_var(&p)); + secp256k1_gej_set_ge(&j, &p); + CHECK(secp256k1_gej_is_valid_var(&j)); + test_point_times_order(&j); + } + secp256k1_fe_sqr(&x, &x); + } + secp256k1_fe_normalize_var(&x); + CHECK(secp256k1_fe_equal_var(&x, &xr)); +} + +void ecmult_const_random_mult(void) { + /* random starting point A (on the curve) */ + secp256k1_ge_t a = SECP256K1_GE_CONST( + 0x6d986544, 0x57ff52b8, 0xcf1b8126, 0x5b802a5b, + 0xa97f9263, 0xb1e88044, 0x93351325, 0x91bc450a, + 0x535c59f7, 0x325e5d2b, 0xc391fbe8, 0x3c12787c, + 0x337e4a98, 0xe82a9011, 0x0123ba37, 0xdd769c7d + ); + /* random initial factor xn */ + secp256k1_scalar_t xn = SECP256K1_SCALAR_CONST( + 0x649d4f77, 0xc4242df7, 0x7f2079c9, 0x14530327, + 0xa31b876a, 0xd2d8ce2a, 0x2236d5c6, 0xd7b2029b + ); + /* expected xn * A (from sage) */ + secp256k1_ge_t expected_b = SECP256K1_GE_CONST( + 0x23773684, 0x4d209dc7, 0x098a786f, 0x20d06fcd, + 0x070a38bf, 0xc11ac651, 0x03004319, 0x1e2a8786, + 0xed8c3b8e, 0xc06dd57b, 0xd06ea66e, 0x45492b0f, + 0xb84e4e1b, 0xfb77e21f, 0x96baae2a, 0x63dec956 + ); + secp256k1_gej_t b; + secp256k1_ecmult_const(&b, &a, &xn); + + CHECK(secp256k1_ge_is_valid_var(&a)); + ge_equals_gej(&expected_b, &b); +} + +void ecmult_const_commutativity(void) { + secp256k1_scalar_t a; + secp256k1_scalar_t b; + secp256k1_gej_t res1; + secp256k1_gej_t res2; + secp256k1_ge_t mid1; + secp256k1_ge_t mid2; + random_scalar_order_test(&a); + random_scalar_order_test(&b); + + secp256k1_ecmult_const(&res1, &secp256k1_ge_const_g, &a); + secp256k1_ecmult_const(&res2, &secp256k1_ge_const_g, &b); + secp256k1_ge_set_gej(&mid1, &res1); + secp256k1_ge_set_gej(&mid2, &res2); + secp256k1_ecmult_const(&res1, &mid1, &b); + secp256k1_ecmult_const(&res2, &mid2, &a); + secp256k1_ge_set_gej(&mid1, &res1); + secp256k1_ge_set_gej(&mid2, &res2); + ge_equals_ge(&mid1, &mid2); +} + +void ecmult_const_mult_zero_one(void) { + secp256k1_scalar_t zero = SECP256K1_SCALAR_CONST(0, 0, 0, 0, 0, 0, 0, 0); + secp256k1_scalar_t one = SECP256K1_SCALAR_CONST(0, 0, 0, 0, 0, 0, 0, 1); + secp256k1_scalar_t negone; + secp256k1_gej_t res1; + secp256k1_ge_t res2; + secp256k1_ge_t point; + secp256k1_scalar_negate(&negone, &one); + + random_group_element_test(&point); + secp256k1_ecmult_const(&res1, &point, &zero); + secp256k1_ge_set_gej(&res2, &res1); + CHECK(secp256k1_ge_is_infinity(&res2)); + secp256k1_ecmult_const(&res1, &point, &one); + secp256k1_ge_set_gej(&res2, &res1); + ge_equals_ge(&res2, &point); + secp256k1_ecmult_const(&res1, &point, &negone); + secp256k1_gej_neg(&res1, &res1); + secp256k1_ge_set_gej(&res2, &res1); + ge_equals_ge(&res2, &point); +} + +void ecmult_const_chain_multiply(void) { + /* Check known result (randomly generated test problem from sage) */ + const secp256k1_scalar_t scalar = SECP256K1_SCALAR_CONST( + 0x4968d524, 0x2abf9b7a, 0x466abbcf, 0x34b11b6d, + 0xcd83d307, 0x827bed62, 0x05fad0ce, 0x18fae63b + ); + const secp256k1_gej_t expected_point = SECP256K1_GEJ_CONST( + 0x5494c15d, 0x32099706, 0xc2395f94, 0x348745fd, + 0x757ce30e, 0x4e8c90fb, 0xa2bad184, 0xf883c69f, + 0x5d195d20, 0xe191bf7f, 0x1be3e55f, 0x56a80196, + 0x6071ad01, 0xf1462f66, 0xc997fa94, 0xdb858435 + ); + secp256k1_gej_t point; + secp256k1_ge_t res; + int i; + + secp256k1_gej_set_ge(&point, &secp256k1_ge_const_g); + for (i = 0; i < 100; ++i) { + secp256k1_ge_t tmp; + secp256k1_ge_set_gej(&tmp, &point); + secp256k1_ecmult_const(&point, &tmp, &scalar); + } + secp256k1_ge_set_gej(&res, &point); + ge_equals_gej(&res, &expected_point); +} + +void run_ecmult_const_tests(void) { + ecmult_const_mult_zero_one(); + ecmult_const_random_mult(); + ecmult_const_commutativity(); + ecmult_const_chain_multiply(); +} + +void test_wnaf(const secp256k1_scalar_t *number, int w) { + secp256k1_scalar_t x, two, t; + int wnaf[256]; + int zeroes = -1; + int i; + int bits; + secp256k1_scalar_set_int(&x, 0); + secp256k1_scalar_set_int(&two, 2); + bits = secp256k1_ecmult_wnaf(wnaf, 256, number, w); + CHECK(bits <= 256); + for (i = bits-1; i >= 0; i--) { + int v = wnaf[i]; + secp256k1_scalar_mul(&x, &x, &two); + if (v) { + CHECK(zeroes == -1 || zeroes >= w-1); /* check that distance between non-zero elements is at least w-1 */ + zeroes=0; + CHECK((v & 1) == 1); /* check non-zero elements are odd */ + CHECK(v <= (1 << (w-1)) - 1); /* check range below */ + CHECK(v >= -(1 << (w-1)) - 1); /* check range above */ + } else { + CHECK(zeroes != -1); /* check that no unnecessary zero padding exists */ + zeroes++; + } + if (v >= 0) { + secp256k1_scalar_set_int(&t, v); + } else { + secp256k1_scalar_set_int(&t, -v); + secp256k1_scalar_negate(&t, &t); + } + secp256k1_scalar_add(&x, &x, &t); + } + CHECK(secp256k1_scalar_eq(&x, number)); /* check that wnaf represents number */ +} + +void test_constant_wnaf_negate(const secp256k1_scalar_t *number) { + secp256k1_scalar_t neg1 = *number; + secp256k1_scalar_t neg2 = *number; + int sign1 = 1; + int sign2 = 1; + + if (!secp256k1_scalar_get_bits(&neg1, 0, 1)) { + secp256k1_scalar_negate(&neg1, &neg1); + sign1 = -1; + } + sign2 = secp256k1_scalar_cond_negate(&neg2, secp256k1_scalar_is_even(&neg2)); + CHECK(sign1 == sign2); + CHECK(secp256k1_scalar_eq(&neg1, &neg2)); +} + +void test_constant_wnaf(const secp256k1_scalar_t *number, int w) { + secp256k1_scalar_t x, shift; + int wnaf[256] = {0}; + int i; +#ifdef USE_ENDOMORPHISM + int skew; +#endif + secp256k1_scalar_t num = *number; + + secp256k1_scalar_set_int(&x, 0); + secp256k1_scalar_set_int(&shift, 1 << w); + /* With USE_ENDOMORPHISM on we only consider 128-bit numbers */ +#ifdef USE_ENDOMORPHISM + for (i = 0; i < 16; ++i) + secp256k1_scalar_shr_int(&num, 8); + skew = secp256k1_wnaf_const(wnaf, num, w); +#else + secp256k1_wnaf_const(wnaf, num, w); +#endif + + for (i = WNAF_SIZE(w); i >= 0; --i) { + secp256k1_scalar_t t; + int v = wnaf[i]; + CHECK(v != 0); /* check nonzero */ + CHECK(v & 1); /* check parity */ + CHECK(v > -(1 << w)); /* check range above */ + CHECK(v < (1 << w)); /* check range below */ + + secp256k1_scalar_mul(&x, &x, &shift); + if (v >= 0) { + secp256k1_scalar_set_int(&t, v); + } else { + secp256k1_scalar_set_int(&t, -v); + secp256k1_scalar_negate(&t, &t); + } + secp256k1_scalar_add(&x, &x, &t); + } +#ifdef USE_ENDOMORPHISM + /* Skew num because when encoding 128-bit numbers as odd we use an offset */ + secp256k1_scalar_cadd_bit(&num, skew == 2, 1); +#endif + CHECK(secp256k1_scalar_eq(&x, &num)); +} + +void run_wnaf(void) { + int i; + secp256k1_scalar_t n = {{0}}; + + /* Sanity check: 1 and 2 are the smallest odd and even numbers and should + * have easier-to-diagnose failure modes */ + n.d[0] = 1; + test_constant_wnaf(&n, 4); + n.d[0] = 2; + test_constant_wnaf(&n, 4); + /* Random tests */ + for (i = 0; i < count; i++) { + random_scalar_order(&n); + test_wnaf(&n, 4+(i%10)); + test_constant_wnaf_negate(&n); + test_constant_wnaf(&n, 4 + (i % 10)); + } +} + +void test_ecmult_constants(void) { + /* Test ecmult_gen() for [0..36) and [order-36..0). */ + secp256k1_scalar_t x; + secp256k1_gej_t r; + secp256k1_ge_t ng; + int i; + int j; + secp256k1_ge_neg(&ng, &secp256k1_ge_const_g); + for (i = 0; i < 36; i++ ) { + secp256k1_scalar_set_int(&x, i); + secp256k1_ecmult_gen(&ctx->ecmult_gen_ctx, &r, &x); + for (j = 0; j < i; j++) { + if (j == i - 1) { + ge_equals_gej(&secp256k1_ge_const_g, &r); + } + secp256k1_gej_add_ge(&r, &r, &ng); + } + CHECK(secp256k1_gej_is_infinity(&r)); + } + for (i = 1; i <= 36; i++ ) { + secp256k1_scalar_set_int(&x, i); + secp256k1_scalar_negate(&x, &x); + secp256k1_ecmult_gen(&ctx->ecmult_gen_ctx, &r, &x); + for (j = 0; j < i; j++) { + if (j == i - 1) { + ge_equals_gej(&ng, &r); + } + secp256k1_gej_add_ge(&r, &r, &secp256k1_ge_const_g); + } + CHECK(secp256k1_gej_is_infinity(&r)); + } +} + +void run_ecmult_constants(void) { + test_ecmult_constants(); +} + +void test_ecmult_gen_blind(void) { + /* Test ecmult_gen() blinding and confirm that the blinding changes, the affline points match, and the z's don't match. */ + secp256k1_scalar_t key; + secp256k1_scalar_t b; + unsigned char seed32[32]; + secp256k1_gej_t pgej; + secp256k1_gej_t pgej2; + secp256k1_gej_t i; + secp256k1_ge_t pge; + random_scalar_order_test(&key); + secp256k1_ecmult_gen(&ctx->ecmult_gen_ctx, &pgej, &key); + secp256k1_rand256(seed32); + b = ctx->ecmult_gen_ctx.blind; + i = ctx->ecmult_gen_ctx.initial; + secp256k1_ecmult_gen_blind(&ctx->ecmult_gen_ctx, seed32); + CHECK(!secp256k1_scalar_eq(&b, &ctx->ecmult_gen_ctx.blind)); + secp256k1_ecmult_gen(&ctx->ecmult_gen_ctx, &pgej2, &key); + CHECK(!gej_xyz_equals_gej(&pgej, &pgej2)); + CHECK(!gej_xyz_equals_gej(&i, &ctx->ecmult_gen_ctx.initial)); + secp256k1_ge_set_gej(&pge, &pgej); + ge_equals_gej(&pge, &pgej2); +} + +void test_ecmult_gen_blind_reset(void) { + /* Test ecmult_gen() blinding reset and confirm that the blinding is consistent. */ + secp256k1_scalar_t b; + secp256k1_gej_t initial; + secp256k1_ecmult_gen_blind(&ctx->ecmult_gen_ctx, 0); + b = ctx->ecmult_gen_ctx.blind; + initial = ctx->ecmult_gen_ctx.initial; + secp256k1_ecmult_gen_blind(&ctx->ecmult_gen_ctx, 0); + CHECK(secp256k1_scalar_eq(&b, &ctx->ecmult_gen_ctx.blind)); + CHECK(gej_xyz_equals_gej(&initial, &ctx->ecmult_gen_ctx.initial)); +} + +void run_ecmult_gen_blind(void) { + int i; + test_ecmult_gen_blind_reset(); + for (i = 0; i < 10; i++) { + test_ecmult_gen_blind(); + } +} + +#ifdef USE_ENDOMORPHISM +/***** ENDOMORPHISH TESTS *****/ +void test_scalar_split(void) { + secp256k1_scalar_t full; + secp256k1_scalar_t s1, slam; + const unsigned char zero[32] = {0}; + unsigned char tmp[32]; + + random_scalar_order_test(&full); + secp256k1_scalar_split_lambda(&s1, &slam, &full); + + /* check that both are <= 128 bits in size */ + if (secp256k1_scalar_is_high(&s1)) + secp256k1_scalar_negate(&s1, &s1); + if (secp256k1_scalar_is_high(&slam)) + secp256k1_scalar_negate(&slam, &slam); + + secp256k1_scalar_get_b32(tmp, &s1); + CHECK(memcmp(zero, tmp, 16) == 0); + secp256k1_scalar_get_b32(tmp, &slam); + CHECK(memcmp(zero, tmp, 16) == 0); +} + +void run_endomorphism_tests(void) { + test_scalar_split(); +} +#endif + +void random_sign(secp256k1_scalar_t *sigr, secp256k1_scalar_t *sigs, const secp256k1_scalar_t *key, const secp256k1_scalar_t *msg, int *recid) { + secp256k1_scalar_t nonce; + do { + random_scalar_order_test(&nonce); + } while(!secp256k1_ecdsa_sig_sign(&ctx->ecmult_gen_ctx, sigr, sigs, key, msg, &nonce, recid)); +} + +void test_ecdsa_sign_verify(void) { + secp256k1_gej_t pubj; + secp256k1_ge_t pub; + secp256k1_scalar_t one; + secp256k1_scalar_t msg, key; + secp256k1_scalar_t sigr, sigs; + int recid; + int getrec; + random_scalar_order_test(&msg); + random_scalar_order_test(&key); + secp256k1_ecmult_gen(&ctx->ecmult_gen_ctx, &pubj, &key); + secp256k1_ge_set_gej(&pub, &pubj); + getrec = secp256k1_rand32()&1; + random_sign(&sigr, &sigs, &key, &msg, getrec?&recid:NULL); + if (getrec) { + CHECK(recid >= 0 && recid < 4); + } + CHECK(secp256k1_ecdsa_sig_verify(&ctx->ecmult_ctx, &sigr, &sigs, &pub, &msg)); + secp256k1_scalar_set_int(&one, 1); + secp256k1_scalar_add(&msg, &msg, &one); + CHECK(!secp256k1_ecdsa_sig_verify(&ctx->ecmult_ctx, &sigr, &sigs, &pub, &msg)); +} + +void run_ecdsa_sign_verify(void) { + int i; + for (i = 0; i < 10*count; i++) { + test_ecdsa_sign_verify(); + } +} + +/** Dummy nonce generation function that just uses a precomputed nonce, and fails if it is not accepted. Use only for testing. */ +static int precomputed_nonce_function(unsigned char *nonce32, const unsigned char *msg32, const unsigned char *key32, const unsigned char *algo16, unsigned int counter, const void *data) { + (void)msg32; + (void)key32; + (void)algo16; + memcpy(nonce32, data, 32); + return (counter == 0); +} + +static int nonce_function_test_fail(unsigned char *nonce32, const unsigned char *msg32, const unsigned char *key32, const unsigned char *algo16, unsigned int counter, const void *data) { + /* Dummy nonce generator that has a fatal error on the first counter value. */ + if (counter == 0) { + return 0; + } + return nonce_function_rfc6979(nonce32, msg32, key32, algo16, counter - 1, data); +} + +static int nonce_function_test_retry(unsigned char *nonce32, const unsigned char *msg32, const unsigned char *key32, const unsigned char *algo16, unsigned int counter, const void *data) { + /* Dummy nonce generator that produces unacceptable nonces for the first several counter values. */ + if (counter < 3) { + memset(nonce32, counter==0 ? 0 : 255, 32); + if (counter == 2) { + nonce32[31]--; + } + return 1; + } + if (counter < 5) { + static const unsigned char order[] = { + 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, + 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFE, + 0xBA,0xAE,0xDC,0xE6,0xAF,0x48,0xA0,0x3B, + 0xBF,0xD2,0x5E,0x8C,0xD0,0x36,0x41,0x41 + }; + memcpy(nonce32, order, 32); + if (counter == 4) { + nonce32[31]++; + } + return 1; + } + /* Retry rate of 6979 is negligible esp. as we only call this in determinstic tests. */ + /* If someone does fine a case where it retries for secp256k1, we'd like to know. */ + if (counter > 5) { + return 0; + } + return nonce_function_rfc6979(nonce32, msg32, key32, algo16, counter - 5, data); +} + +int is_empty_signature(const secp256k1_ecdsa_signature_t *sig) { + static const unsigned char res[sizeof(secp256k1_ecdsa_signature_t)] = {0}; + return memcmp(sig, res, sizeof(secp256k1_ecdsa_signature_t)) == 0; +} + +void test_ecdsa_end_to_end(void) { + unsigned char extra[32] = {0x00}; + unsigned char privkey[32]; + unsigned char message[32]; + unsigned char privkey2[32]; + secp256k1_ecdsa_signature_t signature[5]; + unsigned char sig[74]; + int siglen = 74; + unsigned char pubkeyc[65]; + int pubkeyclen = 65; + secp256k1_pubkey_t pubkey; + secp256k1_pubkey_t recpubkey; + unsigned char seckey[300]; + int recid = 0; + int seckeylen = 300; + + /* Generate a random key and message. */ + { + secp256k1_scalar_t msg, key; + random_scalar_order_test(&msg); + random_scalar_order_test(&key); + secp256k1_scalar_get_b32(privkey, &key); + secp256k1_scalar_get_b32(message, &msg); + } + + /* Construct and verify corresponding public key. */ + CHECK(secp256k1_ec_seckey_verify(ctx, privkey) == 1); + CHECK(secp256k1_ec_pubkey_create(ctx, &pubkey, privkey) == 1); + + /* Verify exporting and importing public key. */ + CHECK(secp256k1_ec_pubkey_serialize(ctx, pubkeyc, &pubkeyclen, &pubkey, secp256k1_rand32() % 2) == 1); + memset(&pubkey, 0, sizeof(pubkey)); + CHECK(secp256k1_ec_pubkey_parse(ctx, &pubkey, pubkeyc, pubkeyclen) == 1); + + /* Verify private key import and export. */ + CHECK(secp256k1_ec_privkey_export(ctx, privkey, seckey, &seckeylen, secp256k1_rand32() % 2) == 1); + CHECK(secp256k1_ec_privkey_import(ctx, privkey2, seckey, seckeylen) == 1); + CHECK(memcmp(privkey, privkey2, 32) == 0); + + /* Optionally tweak the keys using addition. */ + if (secp256k1_rand32() % 3 == 0) { + int ret1; + int ret2; + unsigned char rnd[32]; + secp256k1_pubkey_t pubkey2; + secp256k1_rand256_test(rnd); + ret1 = secp256k1_ec_privkey_tweak_add(ctx, privkey, rnd); + ret2 = secp256k1_ec_pubkey_tweak_add(ctx, &pubkey, rnd); + CHECK(ret1 == ret2); + if (ret1 == 0) { + return; + } + CHECK(secp256k1_ec_pubkey_create(ctx, &pubkey2, privkey) == 1); + CHECK(memcmp(&pubkey, &pubkey2, sizeof(pubkey)) == 0); + } + + /* Optionally tweak the keys using multiplication. */ + if (secp256k1_rand32() % 3 == 0) { + int ret1; + int ret2; + unsigned char rnd[32]; + secp256k1_pubkey_t pubkey2; + secp256k1_rand256_test(rnd); + ret1 = secp256k1_ec_privkey_tweak_mul(ctx, privkey, rnd); + ret2 = secp256k1_ec_pubkey_tweak_mul(ctx, &pubkey, rnd); + CHECK(ret1 == ret2); + if (ret1 == 0) { + return; + } + CHECK(secp256k1_ec_pubkey_create(ctx, &pubkey2, privkey) == 1); + CHECK(memcmp(&pubkey, &pubkey2, sizeof(pubkey)) == 0); + } + + /* Sign. */ + CHECK(secp256k1_ecdsa_sign(ctx, message, &signature[0], privkey, NULL, NULL) == 1); + CHECK(secp256k1_ecdsa_sign(ctx, message, &signature[4], privkey, NULL, NULL) == 1); + CHECK(secp256k1_ecdsa_sign(ctx, message, &signature[1], privkey, NULL, extra) == 1); + extra[31] = 1; + CHECK(secp256k1_ecdsa_sign(ctx, message, &signature[2], privkey, NULL, extra) == 1); + extra[31] = 0; + extra[0] = 1; + CHECK(secp256k1_ecdsa_sign(ctx, message, &signature[3], privkey, NULL, extra) == 1); + CHECK(memcmp(&signature[0], &signature[4], sizeof(signature[0])) == 0); + CHECK(memcmp(&signature[0], &signature[1], sizeof(signature[0])) != 0); + CHECK(memcmp(&signature[0], &signature[2], sizeof(signature[0])) != 0); + CHECK(memcmp(&signature[0], &signature[3], sizeof(signature[0])) != 0); + CHECK(memcmp(&signature[1], &signature[2], sizeof(signature[0])) != 0); + CHECK(memcmp(&signature[1], &signature[3], sizeof(signature[0])) != 0); + CHECK(memcmp(&signature[2], &signature[3], sizeof(signature[0])) != 0); + /* Verify. */ + CHECK(secp256k1_ecdsa_verify(ctx, message, &signature[0], &pubkey) == 1); + CHECK(secp256k1_ecdsa_verify(ctx, message, &signature[1], &pubkey) == 1); + CHECK(secp256k1_ecdsa_verify(ctx, message, &signature[2], &pubkey) == 1); + CHECK(secp256k1_ecdsa_verify(ctx, message, &signature[3], &pubkey) == 1); + + /* Serialize/parse DER and verify again */ + CHECK(secp256k1_ecdsa_signature_serialize_der(ctx, sig, &siglen, &signature[0]) == 1); + memset(&signature[0], 0, sizeof(signature[0])); + CHECK(secp256k1_ecdsa_signature_parse_der(ctx, &signature[0], sig, siglen) == 1); + CHECK(secp256k1_ecdsa_verify(ctx, message, &signature[0], &pubkey) == 1); + /* Serialize/destroy/parse DER and verify again. */ + CHECK(secp256k1_ecdsa_signature_serialize_der(ctx, sig, &siglen, &signature[0]) == 1); + sig[secp256k1_rand32() % siglen] += 1 + (secp256k1_rand32() % 255); + CHECK(secp256k1_ecdsa_signature_parse_der(ctx, &signature[0], sig, siglen) == 0 || + secp256k1_ecdsa_verify(ctx, message, &signature[0], &pubkey) == 0); + + /* Serialize/parse compact (without recovery id) and verify again. */ + CHECK(secp256k1_ecdsa_signature_serialize_compact(ctx, sig, &recid, &signature[4]) == 1); + CHECK(secp256k1_ecdsa_signature_serialize_compact(ctx, sig, NULL, &signature[4]) == 1); + CHECK(secp256k1_ecdsa_verify(ctx, message, &signature[4], &pubkey) == 1); + memset(&signature[4], 0, sizeof(signature[4])); + CHECK(secp256k1_ecdsa_signature_parse_compact(ctx, &signature[4], sig, -1) == 1); + CHECK(secp256k1_ecdsa_verify(ctx, message, &signature[4], &pubkey) == 1); + /* Parse compact (with recovery id) and recover. */ + CHECK(secp256k1_ecdsa_signature_parse_compact(ctx, &signature[4], sig, recid) == 1); + CHECK(secp256k1_ecdsa_recover(ctx, message, &signature[4], &recpubkey) == 1); + CHECK(memcmp(&pubkey, &recpubkey, sizeof(pubkey)) == 0); + /* Serialize/destroy/parse signature and verify again. */ + CHECK(secp256k1_ecdsa_signature_serialize_compact(ctx, sig, &recid, &signature[4]) == 1); + sig[secp256k1_rand32() % 64] += 1 + (secp256k1_rand32() % 255); + CHECK(secp256k1_ecdsa_signature_parse_compact(ctx, &signature[4], sig, recid) == 1); + CHECK(secp256k1_ecdsa_verify(ctx, message, &signature[4], &pubkey) == 0); + /* Recover again */ + CHECK(secp256k1_ecdsa_recover(ctx, message, &signature[4], &recpubkey) == 0 || + memcmp(&pubkey, &recpubkey, sizeof(pubkey)) != 0); +} + +void test_random_pubkeys(void) { + secp256k1_ge_t elem; + secp256k1_ge_t elem2; + unsigned char in[65]; + /* Generate some randomly sized pubkeys. */ + uint32_t r = secp256k1_rand32(); + int len = (r & 3) == 0 ? 65 : 33; + r>>=2; + if ((r & 3) == 0) { + len = (r & 252) >> 3; + } + r>>=8; + if (len == 65) { + in[0] = (r & 2) ? 4 : (r & 1? 6 : 7); + } else { + in[0] = (r & 1) ? 2 : 3; + } + r>>=2; + if ((r & 7) == 0) { + in[0] = (r & 2040) >> 3; + } + r>>=11; + if (len > 1) { + secp256k1_rand256(&in[1]); + } + if (len > 33) { + secp256k1_rand256(&in[33]); + } + if (secp256k1_eckey_pubkey_parse(&elem, in, len)) { + unsigned char out[65]; + unsigned char firstb; + int res; + int size = len; + firstb = in[0]; + /* If the pubkey can be parsed, it should round-trip... */ + CHECK(secp256k1_eckey_pubkey_serialize(&elem, out, &size, len == 33)); + CHECK(size == len); + CHECK(memcmp(&in[1], &out[1], len-1) == 0); + /* ... except for the type of hybrid inputs. */ + if ((in[0] != 6) && (in[0] != 7)) { + CHECK(in[0] == out[0]); + } + size = 65; + CHECK(secp256k1_eckey_pubkey_serialize(&elem, in, &size, 0)); + CHECK(size == 65); + CHECK(secp256k1_eckey_pubkey_parse(&elem2, in, size)); + ge_equals_ge(&elem,&elem2); + /* Check that the X9.62 hybrid type is checked. */ + in[0] = (r & 1) ? 6 : 7; + res = secp256k1_eckey_pubkey_parse(&elem2, in, size); + if (firstb == 2 || firstb == 3) { + if (in[0] == firstb + 4) { + CHECK(res); + } else { + CHECK(!res); + } + } + if (res) { + ge_equals_ge(&elem,&elem2); + CHECK(secp256k1_eckey_pubkey_serialize(&elem, out, &size, 0)); + CHECK(memcmp(&in[1], &out[1], 64) == 0); + } + } +} + +void run_random_pubkeys(void) { + int i; + for (i = 0; i < 10*count; i++) { + test_random_pubkeys(); + } +} + +void run_ecdsa_end_to_end(void) { + int i; + for (i = 0; i < 64*count; i++) { + test_ecdsa_end_to_end(); + } +} + +/* Tests several edge cases. */ +void test_ecdsa_edge_cases(void) { + const unsigned char msg32[32] = { + 'T', 'h', 'i', 's', ' ', 'i', 's', ' ', + 'a', ' ', 'v', 'e', 'r', 'y', ' ', 's', + 'e', 'c', 'r', 'e', 't', ' ', 'm', 'e', + 's', 's', 'a', 'g', 'e', '.', '.', '.' + }; + const unsigned char sig64[64] = { + /* Generated by signing the above message with nonce 'This is the nonce we will use...' + * and secret key 0 (which is not valid), resulting in recid 0. */ + 0x67, 0xCB, 0x28, 0x5F, 0x9C, 0xD1, 0x94, 0xE8, + 0x40, 0xD6, 0x29, 0x39, 0x7A, 0xF5, 0x56, 0x96, + 0x62, 0xFD, 0xE4, 0x46, 0x49, 0x99, 0x59, 0x63, + 0x17, 0x9A, 0x7D, 0xD1, 0x7B, 0xD2, 0x35, 0x32, + 0x4B, 0x1B, 0x7D, 0xF3, 0x4C, 0xE1, 0xF6, 0x8E, + 0x69, 0x4F, 0xF6, 0xF1, 0x1A, 0xC7, 0x51, 0xDD, + 0x7D, 0xD7, 0x3E, 0x38, 0x7E, 0xE4, 0xFC, 0x86, + 0x6E, 0x1B, 0xE8, 0xEC, 0xC7, 0xDD, 0x95, 0x57 + }; + secp256k1_pubkey_t pubkey; + int t; + /* signature (r,s) = (4,4), which can be recovered with all 4 recids. */ + const unsigned char sigb64[64] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, + }; + secp256k1_pubkey_t pubkeyb; + secp256k1_ecdsa_signature_t sig; + int recid; + + CHECK(secp256k1_ecdsa_signature_parse_compact(ctx, &sig, sig64, 0)); + CHECK(!secp256k1_ecdsa_recover(ctx, msg32, &sig, &pubkey)); + CHECK(secp256k1_ecdsa_signature_parse_compact(ctx, &sig, sig64, 1)); + CHECK(secp256k1_ecdsa_recover(ctx, msg32, &sig, &pubkey)); + CHECK(secp256k1_ecdsa_signature_parse_compact(ctx, &sig, sig64, 2)); + CHECK(!secp256k1_ecdsa_recover(ctx, msg32, &sig, &pubkey)); + CHECK(secp256k1_ecdsa_signature_parse_compact(ctx, &sig, sig64, 3)); + CHECK(!secp256k1_ecdsa_recover(ctx, msg32, &sig, &pubkey)); + + for (recid = 0; recid < 4; recid++) { + int i; + int recid2; + /* (4,4) encoded in DER. */ + unsigned char sigbder[8] = {0x30, 0x06, 0x02, 0x01, 0x04, 0x02, 0x01, 0x04}; + unsigned char sigcder_zr[7] = {0x30, 0x05, 0x02, 0x00, 0x02, 0x01, 0x01}; + unsigned char sigcder_zs[7] = {0x30, 0x05, 0x02, 0x01, 0x01, 0x02, 0x00}; + unsigned char sigbderalt1[39] = { + 0x30, 0x25, 0x02, 0x20, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x04, 0x02, 0x01, 0x04, + }; + unsigned char sigbderalt2[39] = { + 0x30, 0x25, 0x02, 0x01, 0x04, 0x02, 0x20, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, + }; + unsigned char sigbderalt3[40] = { + 0x30, 0x26, 0x02, 0x21, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x04, 0x02, 0x01, 0x04, + }; + unsigned char sigbderalt4[40] = { + 0x30, 0x26, 0x02, 0x01, 0x04, 0x02, 0x21, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, + }; + /* (order + r,4) encoded in DER. */ + unsigned char sigbderlong[40] = { + 0x30, 0x26, 0x02, 0x21, 0x00, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xBA, 0xAE, 0xDC, + 0xE6, 0xAF, 0x48, 0xA0, 0x3B, 0xBF, 0xD2, 0x5E, + 0x8C, 0xD0, 0x36, 0x41, 0x45, 0x02, 0x01, 0x04 + }; + CHECK(secp256k1_ecdsa_signature_parse_compact(ctx, &sig, sigb64, recid) == 1); + CHECK(secp256k1_ecdsa_recover(ctx, msg32, &sig, &pubkeyb) == 1); + CHECK(secp256k1_ecdsa_signature_parse_der(ctx, &sig, sigbder, sizeof(sigbder)) == 1); + CHECK(secp256k1_ecdsa_verify(ctx, msg32, &sig, &pubkeyb) == 1); + for (recid2 = 0; recid2 < 4; recid2++) { + secp256k1_pubkey_t pubkey2b; + CHECK(secp256k1_ecdsa_signature_parse_compact(ctx, &sig, sigb64, recid2) == 1); + CHECK(secp256k1_ecdsa_recover(ctx, msg32, &sig, &pubkey2b) == 1); + /* Verifying with (order + r,4) should always fail. */ + CHECK(secp256k1_ecdsa_signature_parse_der(ctx, &sig, sigbderlong, sizeof(sigbderlong)) == 0); + } + /* DER parsing tests. */ + /* Zero length r/s. */ + CHECK(secp256k1_ecdsa_signature_parse_der(ctx, &sig, sigcder_zr, sizeof(sigcder_zr)) == 0); + CHECK(secp256k1_ecdsa_signature_parse_der(ctx, &sig, sigcder_zs, sizeof(sigcder_zs)) == 0); + /* Leading zeros. */ + CHECK(secp256k1_ecdsa_signature_parse_der(ctx, &sig, sigbderalt1, sizeof(sigbderalt1)) == 1); + CHECK(secp256k1_ecdsa_verify(ctx, msg32, &sig, &pubkeyb) == 1); + CHECK(secp256k1_ecdsa_signature_parse_der(ctx, &sig, sigbderalt2, sizeof(sigbderalt2)) == 1); + CHECK(secp256k1_ecdsa_verify(ctx, msg32, &sig, &pubkeyb) == 1); + CHECK(secp256k1_ecdsa_signature_parse_der(ctx, &sig, sigbderalt3, sizeof(sigbderalt3)) == 1); + CHECK(secp256k1_ecdsa_verify(ctx, msg32, &sig, &pubkeyb) == 1); + CHECK(secp256k1_ecdsa_signature_parse_der(ctx, &sig, sigbderalt4, sizeof(sigbderalt4)) == 1); + CHECK(secp256k1_ecdsa_verify(ctx, msg32, &sig, &pubkeyb) == 1); + sigbderalt3[4] = 1; + CHECK(secp256k1_ecdsa_signature_parse_der(ctx, &sig, sigbderalt3, sizeof(sigbderalt3)) == 0); + sigbderalt4[7] = 1; + CHECK(secp256k1_ecdsa_signature_parse_der(ctx, &sig, sigbderalt4, sizeof(sigbderalt4)) == 0); + /* Damage signature. */ + sigbder[7]++; + CHECK(secp256k1_ecdsa_signature_parse_der(ctx, &sig, sigbder, sizeof(sigbder)) == 1); + CHECK(secp256k1_ecdsa_verify(ctx, msg32, &sig, &pubkeyb) == 0); + sigbder[7]--; + CHECK(secp256k1_ecdsa_signature_parse_der(ctx, &sig, sigbder, 6) == 0); + CHECK(secp256k1_ecdsa_signature_parse_der(ctx, &sig, sigbder, sizeof(sigbder) - 1) == 0); + for(i = 0; i < 8; i++) { + int c; + unsigned char orig = sigbder[i]; + /*Try every single-byte change.*/ + for (c = 0; c < 256; c++) { + if (c == orig ) { + continue; + } + sigbder[i] = c; + CHECK(secp256k1_ecdsa_signature_parse_der(ctx, &sig, sigbder, sizeof(sigbder)) == 0 || secp256k1_ecdsa_verify(ctx, msg32, &sig, &pubkeyb) == 0); + } + sigbder[i] = orig; + } + } + + /* Test the case where ECDSA recomputes a point that is infinity. */ + { + secp256k1_gej_t keyj; + secp256k1_ge_t key; + secp256k1_scalar_t msg; + secp256k1_scalar_t sr, ss; + secp256k1_scalar_set_int(&ss, 1); + secp256k1_scalar_negate(&ss, &ss); + secp256k1_scalar_inverse(&ss, &ss); + secp256k1_scalar_set_int(&sr, 1); + secp256k1_ecmult_gen(&ctx->ecmult_gen_ctx, &keyj, &sr); + secp256k1_ge_set_gej(&key, &keyj); + msg = ss; + CHECK(secp256k1_ecdsa_sig_verify(&ctx->ecmult_ctx, &sr, &ss, &key, &msg) == 0); + } + + /* Test r/s equal to zero */ + { + /* (1,1) encoded in DER. */ + unsigned char sigcder[8] = {0x30, 0x06, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01}; + unsigned char sigc64[64] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + }; + secp256k1_pubkey_t pubkeyc; + CHECK(secp256k1_ecdsa_signature_parse_compact(ctx, &sig, sigc64, 0) == 1); + CHECK(secp256k1_ecdsa_recover(ctx, msg32, &sig, &pubkeyc) == 1); + CHECK(secp256k1_ecdsa_signature_parse_der(ctx, &sig, sigcder, sizeof(sigcder)) == 1); + CHECK(secp256k1_ecdsa_verify(ctx, msg32, &sig, &pubkeyc) == 1); + sigcder[4] = 0; + sigc64[31] = 0; + CHECK(secp256k1_ecdsa_signature_parse_compact(ctx, &sig, sigc64, 0) == 1); + CHECK(secp256k1_ecdsa_recover(ctx, msg32, &sig, &pubkeyb) == 0); + CHECK(secp256k1_ecdsa_signature_parse_der(ctx, &sig, sigcder, sizeof(sigcder)) == 1); + CHECK(secp256k1_ecdsa_verify(ctx, msg32, &sig, &pubkeyc) == 0); + sigcder[4] = 1; + sigcder[7] = 0; + sigc64[31] = 1; + sigc64[63] = 0; + CHECK(secp256k1_ecdsa_signature_parse_compact(ctx, &sig, sigc64, 0) == 1); + CHECK(secp256k1_ecdsa_recover(ctx, msg32, &sig, &pubkeyb) == 0); + CHECK(secp256k1_ecdsa_signature_parse_der(ctx, &sig, sigcder, sizeof(sigcder)) == 1); + CHECK(secp256k1_ecdsa_verify(ctx, msg32, &sig, &pubkeyc) == 0); + } + + /*Signature where s would be zero.*/ + { + unsigned char signature[72]; + int siglen; + const unsigned char nonce[32] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + }; + static const unsigned char nonce2[32] = { + 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, + 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFE, + 0xBA,0xAE,0xDC,0xE6,0xAF,0x48,0xA0,0x3B, + 0xBF,0xD2,0x5E,0x8C,0xD0,0x36,0x41,0x40 + }; + const unsigned char key[32] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + }; + unsigned char msg[32] = { + 0x86, 0x41, 0x99, 0x81, 0x06, 0x23, 0x44, 0x53, + 0xaa, 0x5f, 0x9d, 0x6a, 0x31, 0x78, 0xf4, 0xf7, + 0xb8, 0x12, 0xe0, 0x0b, 0x81, 0x7a, 0x77, 0x62, + 0x65, 0xdf, 0xdd, 0x31, 0xb9, 0x3e, 0x29, 0xa9, + }; + CHECK(secp256k1_ecdsa_sign(ctx, msg, &sig, key, precomputed_nonce_function, nonce) == 0); + CHECK(secp256k1_ecdsa_sign(ctx, msg, &sig, key, precomputed_nonce_function, nonce2) == 0); + msg[31] = 0xaa; + CHECK(secp256k1_ecdsa_sign(ctx, msg, &sig, key, precomputed_nonce_function, nonce) == 1); + CHECK(secp256k1_ecdsa_sign(ctx, msg, &sig, key, precomputed_nonce_function, nonce2) == 1); + siglen = 72; + CHECK(secp256k1_ecdsa_signature_serialize_der(ctx, signature, &siglen, &sig) == 1); + siglen = 10; + CHECK(secp256k1_ecdsa_signature_serialize_der(ctx, signature, &siglen, &sig) == 0); + } + + /* Nonce function corner cases. */ + for (t = 0; t < 2; t++) { + static const unsigned char zero[32] = {0x00}; + int i; + unsigned char key[32]; + unsigned char msg[32]; + secp256k1_ecdsa_signature_t sig2; + secp256k1_scalar_t sr[512], ss; + const unsigned char *extra; + extra = t == 0 ? NULL : zero; + memset(msg, 0, 32); + msg[31] = 1; + /* High key results in signature failure. */ + memset(key, 0xFF, 32); + CHECK(secp256k1_ecdsa_sign(ctx, msg, &sig, key, NULL, extra) == 0); + CHECK(is_empty_signature(&sig)); + /* Zero key results in signature failure. */ + memset(key, 0, 32); + CHECK(secp256k1_ecdsa_sign(ctx, msg, &sig, key, NULL, extra) == 0); + CHECK(is_empty_signature(&sig)); + /* Nonce function failure results in signature failure. */ + key[31] = 1; + CHECK(secp256k1_ecdsa_sign(ctx, msg, &sig, key, nonce_function_test_fail, extra) == 0); + CHECK(is_empty_signature(&sig)); + /* The retry loop successfully makes its way to the first good value. */ + CHECK(secp256k1_ecdsa_sign(ctx, msg, &sig, key, nonce_function_test_retry, extra) == 1); + CHECK(!is_empty_signature(&sig)); + CHECK(secp256k1_ecdsa_sign(ctx, msg, &sig2, key, nonce_function_rfc6979, extra) == 1); + CHECK(!is_empty_signature(&sig2)); + CHECK(memcmp(&sig, &sig2, sizeof(sig)) == 0); + /* The default nonce function is determinstic. */ + CHECK(secp256k1_ecdsa_sign(ctx, msg, &sig2, key, NULL, extra) == 1); + CHECK(!is_empty_signature(&sig2)); + CHECK(memcmp(&sig, &sig2, sizeof(sig)) == 0); + /* The default nonce function changes output with different messages. */ + for(i = 0; i < 256; i++) { + int j; + msg[0] = i; + CHECK(secp256k1_ecdsa_sign(ctx, msg, &sig2, key, NULL, extra) == 1); + CHECK(!is_empty_signature(&sig2)); + secp256k1_ecdsa_signature_load(ctx, &sr[i], &ss, NULL, &sig2); + for (j = 0; j < i; j++) { + CHECK(!secp256k1_scalar_eq(&sr[i], &sr[j])); + } + } + msg[0] = 0; + msg[31] = 2; + /* The default nonce function changes output with different keys. */ + for(i = 256; i < 512; i++) { + int j; + key[0] = i - 256; + CHECK(secp256k1_ecdsa_sign(ctx, msg, &sig2, key, NULL, extra) == 1); + CHECK(!is_empty_signature(&sig2)); + secp256k1_ecdsa_signature_load(ctx, &sr[i], &ss, NULL, &sig2); + for (j = 0; j < i; j++) { + CHECK(!secp256k1_scalar_eq(&sr[i], &sr[j])); + } + } + key[0] = 0; + } + + /* Privkey export where pubkey is the point at infinity. */ + { + unsigned char privkey[300]; + unsigned char seckey[32] = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe, + 0xba, 0xae, 0xdc, 0xe6, 0xaf, 0x48, 0xa0, 0x3b, + 0xbf, 0xd2, 0x5e, 0x8c, 0xd0, 0x36, 0x41, 0x41, + }; + int outlen = 300; + CHECK(!secp256k1_ec_privkey_export(ctx, seckey, privkey, &outlen, 0)); + CHECK(!secp256k1_ec_privkey_export(ctx, seckey, privkey, &outlen, 1)); + } +} + +void run_ecdsa_edge_cases(void) { + test_ecdsa_edge_cases(); +} + +#ifdef ENABLE_OPENSSL_TESTS +EC_KEY *get_openssl_key(const secp256k1_scalar_t *key) { + unsigned char privkey[300]; + int privkeylen; + const unsigned char* pbegin = privkey; + int compr = secp256k1_rand32() & 1; + EC_KEY *ec_key = EC_KEY_new_by_curve_name(NID_secp256k1); + CHECK(secp256k1_eckey_privkey_serialize(&ctx->ecmult_gen_ctx, privkey, &privkeylen, key, compr)); + CHECK(d2i_ECPrivateKey(&ec_key, &pbegin, privkeylen)); + CHECK(EC_KEY_check_key(ec_key)); + return ec_key; +} + +void test_ecdsa_openssl(void) { + secp256k1_gej_t qj; + secp256k1_ge_t q; + secp256k1_scalar_t sigr, sigs; + secp256k1_scalar_t one; + secp256k1_scalar_t msg2; + secp256k1_scalar_t key, msg; + EC_KEY *ec_key; + unsigned int sigsize = 80; + int secp_sigsize = 80; + unsigned char message[32]; + unsigned char signature[80]; + secp256k1_rand256_test(message); + secp256k1_scalar_set_b32(&msg, message, NULL); + random_scalar_order_test(&key); + secp256k1_ecmult_gen(&ctx->ecmult_gen_ctx, &qj, &key); + secp256k1_ge_set_gej(&q, &qj); + ec_key = get_openssl_key(&key); + CHECK(ec_key); + CHECK(ECDSA_sign(0, message, sizeof(message), signature, &sigsize, ec_key)); + CHECK(secp256k1_ecdsa_sig_parse(&sigr, &sigs, signature, sigsize)); + CHECK(secp256k1_ecdsa_sig_verify(&ctx->ecmult_ctx, &sigr, &sigs, &q, &msg)); + secp256k1_scalar_set_int(&one, 1); + secp256k1_scalar_add(&msg2, &msg, &one); + CHECK(!secp256k1_ecdsa_sig_verify(&ctx->ecmult_ctx, &sigr, &sigs, &q, &msg2)); + + random_sign(&sigr, &sigs, &key, &msg, NULL); + CHECK(secp256k1_ecdsa_sig_serialize(signature, &secp_sigsize, &sigr, &sigs)); + CHECK(ECDSA_verify(0, message, sizeof(message), signature, secp_sigsize, ec_key) == 1); + + EC_KEY_free(ec_key); +} + +void run_ecdsa_openssl(void) { + int i; + for (i = 0; i < 10*count; i++) { + test_ecdsa_openssl(); + } +} +#endif + +#ifdef ENABLE_MODULE_ECDH +# include "modules/ecdh/tests_impl.h" +#endif + +#ifdef ENABLE_MODULE_SCHNORR +# include "modules/schnorr/tests_impl.h" +#endif + +int main(int argc, char **argv) { + unsigned char seed16[16] = {0}; + unsigned char run32[32] = {0}; + /* find iteration count */ + if (argc > 1) { + count = strtol(argv[1], NULL, 0); + } + + /* find random seed */ + if (argc > 2) { + int pos = 0; + const char* ch = argv[2]; + while (pos < 16 && ch[0] != 0 && ch[1] != 0) { + unsigned short sh; + if (sscanf(ch, "%2hx", &sh)) { + seed16[pos] = sh; + } else { + break; + } + ch += 2; + pos++; + } + } else { + FILE *frand = fopen("/dev/urandom", "r"); + if (!frand || !fread(&seed16, sizeof(seed16), 1, frand)) { + uint64_t t = time(NULL) * (uint64_t)1337; + seed16[0] ^= t; + seed16[1] ^= t >> 8; + seed16[2] ^= t >> 16; + seed16[3] ^= t >> 24; + seed16[4] ^= t >> 32; + seed16[5] ^= t >> 40; + seed16[6] ^= t >> 48; + seed16[7] ^= t >> 56; + } + fclose(frand); + } + secp256k1_rand_seed(seed16); + + printf("test count = %i\n", count); + printf("random seed = %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n", seed16[0], seed16[1], seed16[2], seed16[3], seed16[4], seed16[5], seed16[6], seed16[7], seed16[8], seed16[9], seed16[10], seed16[11], seed16[12], seed16[13], seed16[14], seed16[15]); + + /* initialize */ + run_context_tests(); + ctx = secp256k1_context_create(SECP256K1_CONTEXT_SIGN | SECP256K1_CONTEXT_VERIFY); + + if (secp256k1_rand32() & 1) { + secp256k1_rand256(run32); + CHECK(secp256k1_context_randomize(ctx, secp256k1_rand32() & 1 ? run32 : NULL)); + } + + run_sha256_tests(); + run_hmac_sha256_tests(); + run_rfc6979_hmac_sha256_tests(); + +#ifndef USE_NUM_NONE + /* num tests */ + run_num_smalltests(); +#endif + + /* scalar tests */ + run_scalar_tests(); + + /* field tests */ + run_field_inv(); + run_field_inv_var(); + run_field_inv_all_var(); + run_field_misc(); + run_field_convert(); + run_sqr(); + run_sqrt(); + + /* group tests */ + run_ge(); + + /* ecmult tests */ + run_wnaf(); + run_point_times_order(); + run_ecmult_chain(); + run_ecmult_constants(); + run_ecmult_gen_blind(); + run_ecmult_const_tests(); + run_ec_combine(); + + /* endomorphism tests */ +#ifdef USE_ENDOMORPHISM + run_endomorphism_tests(); +#endif + +#ifdef ENABLE_MODULE_ECDH + /* ecdh tests */ + run_ecdh_tests(); +#endif + + /* ecdsa tests */ + run_random_pubkeys(); + run_ecdsa_sign_verify(); + run_ecdsa_end_to_end(); + run_ecdsa_edge_cases(); +#ifdef ENABLE_OPENSSL_TESTS + run_ecdsa_openssl(); +#endif + +#ifdef ENABLE_MODULE_SCHNORR + /* Schnorr tests */ + run_schnorr_tests(); +#endif + + secp256k1_rand256(run32); + printf("random run = %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n", run32[0], run32[1], run32[2], run32[3], run32[4], run32[5], run32[6], run32[7], run32[8], run32[9], run32[10], run32[11], run32[12], run32[13], run32[14], run32[15]); + + /* shutdown */ + secp256k1_context_destroy(ctx); + return 0; +} diff --git a/bf/secp256k1/src/util.h b/bf/secp256k1/src/util.h new file mode 100644 index 0000000..ce21c42 --- /dev/null +++ b/bf/secp256k1/src/util.h @@ -0,0 +1,106 @@ +/********************************************************************** + * Copyright (c) 2013, 2014 Pieter Wuille * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#ifndef _SECP256K1_UTIL_H_ +#define _SECP256K1_UTIL_H_ + +#if defined HAVE_CONFIG_H +#include "libsecp256k1-config.h" +#endif + +#include +#include +#include + +typedef struct { + void (*fn)(const char *text, void* data); + void* data; +} callback_t; + +#ifdef DETERMINISTIC +#define TEST_FAILURE(msg) do { \ + fprintf(stderr, "%s\n", msg); \ + abort(); \ +} while(0); +#else +#define TEST_FAILURE(msg) do { \ + fprintf(stderr, "%s:%d: %s\n", __FILE__, __LINE__, msg); \ + abort(); \ +} while(0) +#endif + +#ifdef HAVE_BUILTIN_EXPECT +#define EXPECT(x,c) __builtin_expect((x),(c)) +#else +#define EXPECT(x,c) (x) +#endif + +#ifdef DETERMINISTIC +#define CHECK(cond) do { \ + if (EXPECT(!(cond), 0)) { \ + TEST_FAILURE("test condition failed"); \ + } \ +} while(0) +#else +#define CHECK(cond) do { \ + if (EXPECT(!(cond), 0)) { \ + TEST_FAILURE("test condition failed: " #cond); \ + } \ +} while(0) +#endif + +/* Like assert(), but when VERIFY is defined, and side-effect safe. */ +#ifdef VERIFY +#define VERIFY_CHECK CHECK +#define VERIFY_SETUP(stmt) do { stmt; } while(0) +#else +#define VERIFY_CHECK(cond) do { (void)(cond); } while(0) +#define VERIFY_SETUP(stmt) +#endif + +static SECP256K1_INLINE void *checked_malloc(const callback_t* cb, size_t size) { + void *ret = malloc(size); + if (ret == NULL) { + cb->fn("Out of memory", cb->data); + } + return ret; +} + +/* Macro for restrict, when available and not in a VERIFY build. */ +#if defined(SECP256K1_BUILD) && defined(VERIFY) +# define SECP256K1_RESTRICT +#else +# if (!defined(__STDC_VERSION__) || (__STDC_VERSION__ < 199901L) ) +# if SECP256K1_GNUC_PREREQ(3,0) +# define SECP256K1_RESTRICT __restrict__ +# elif (defined(_MSC_VER) && _MSC_VER >= 1400) +# define SECP256K1_RESTRICT __restrict +# else +# define SECP256K1_RESTRICT +# endif +# else +# define SECP256K1_RESTRICT restrict +# endif +#endif + +#if defined(_WIN32) +# define I64FORMAT "I64d" +# define I64uFORMAT "I64u" +#else +# define I64FORMAT "lld" +# define I64uFORMAT "llu" +#endif + +#if defined(HAVE___INT128) +# if defined(__GNUC__) +# define SECP256K1_GNUC_EXT __extension__ +# else +# define SECP256K1_GNUC_EXT +# endif +SECP256K1_GNUC_EXT typedef unsigned __int128 uint128_t; +#endif + +#endif diff --git a/cnc/pom.xml b/cnc/pom.xml new file mode 100644 index 0000000..9f76d6d --- /dev/null +++ b/cnc/pom.xml @@ -0,0 +1,83 @@ + + 4.0.0 + + org.btcollider + cnc + 0.0.1-SNAPSHOT + jar + + cnc + http://maven.apache.org + + + UTF-8 + 1.8 + 4.12 + 5.0.0 + ${junit.version}.0 + 5.0.0 + 1.0.0 + + + + + + maven-compiler-plugin + 3.1 + + ${java.version} + ${java.version} + + + + maven-surefire-plugin + 2.19 + + + org.junit.platform + junit-platform-surefire-provider + ${junit.platform.version} + + + + + + + + + + org.junit.jupiter + junit-jupiter-engine + ${junit.jupiter.version} + test + + + + junit + junit + ${junit.version} + test + + + org.junit.platform + junit-platform-runner + ${junit.platform.version} + test + + + org.junit.vintage + junit-vintage-engine + ${junit.vintage.version} + test + + + + + org.apache.logging.log4j + log4j-slf4j-impl + 2.11.0 + + + diff --git a/cnc/src/main/java/org/btcollider/cnc/CnC.java b/cnc/src/main/java/org/btcollider/cnc/CnC.java new file mode 100644 index 0000000..e86067d --- /dev/null +++ b/cnc/src/main/java/org/btcollider/cnc/CnC.java @@ -0,0 +1,20 @@ +package org.btcollider.cnc; + +import org.btcollider.cnc.comm.CommServer; +import org.btcollider.cnc.keysrv.KeyServer; + +/** + * Hello world! + * + */ +public class CnC { + public static final int PORT = 26765; + // Invariant: positive long (2^63-1) must be able to hold all bits handled here + public static final int MAX_BITS = 62; + + public static void main(String[] args) { + KeyServer.init(55, 20); + CommServer server = new CommServer(PORT); + server.listen(); + } +} diff --git a/cnc/src/main/java/org/btcollider/cnc/comm/ClientWorker.java b/cnc/src/main/java/org/btcollider/cnc/comm/ClientWorker.java new file mode 100644 index 0000000..8d9dad0 --- /dev/null +++ b/cnc/src/main/java/org/btcollider/cnc/comm/ClientWorker.java @@ -0,0 +1,72 @@ +package org.btcollider.cnc.comm; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.io.PrintWriter; +import java.net.Socket; + +import org.btcollider.cnc.dto.KeyRange; +import org.btcollider.cnc.keysrv.KeyServer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ClientWorker implements Runnable { + final Logger log = LoggerFactory.getLogger(ClientWorker.class); + + private Socket clientSocket; + private BufferedReader in; + private PrintWriter out; + + public ClientWorker(Socket clientSocket) { + this.clientSocket = clientSocket; + } + + @Override + public void run() { + try { + in = new BufferedReader(new InputStreamReader(clientSocket.getInputStream())); + out = new PrintWriter(new OutputStreamWriter(clientSocket.getOutputStream())); + + String cmd = in.readLine(); + + switch (cmd) { + case "WRK": + sendWork(); + break; + case "RES": + retrieveResult(); + break; + default: + assert false; + } + + } catch (IOException e) { + e.printStackTrace(); + } finally { + if (in != null) { + try { + in.close(); + } catch (IOException e) { + e.printStackTrace(); + } + } + if (out != null) { + out.flush(); + out.close(); + } + } + } + + private void sendWork() { + KeyServer ks = KeyServer.getInstance(); + KeyRange kr = ks.getRange(); + out.println(kr.getStart() + " " + kr.getEnd() + " " + kr.getTotal()); + out.flush(); + } + + private void retrieveResult() { + + } +} diff --git a/cnc/src/main/java/org/btcollider/cnc/comm/CommServer.java b/cnc/src/main/java/org/btcollider/cnc/comm/CommServer.java new file mode 100644 index 0000000..2339b83 --- /dev/null +++ b/cnc/src/main/java/org/btcollider/cnc/comm/CommServer.java @@ -0,0 +1,53 @@ +package org.btcollider.cnc.comm; + +import java.io.IOException; +import java.net.ServerSocket; +import java.net.Socket; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class CommServer { + final Logger log = LoggerFactory.getLogger(CommServer.class); + + private ServerSocket serverSocket; + + public CommServer(int port) { + log.debug("Starting server on port {}", port); + + try { + this.serverSocket = new ServerSocket(); + } catch (IOException e) { + log.error("Couldn't allocate server socket", e); + + if(this.serverSocket != null && !this.serverSocket.isClosed()) { + try { + serverSocket.close(); + } catch (IOException e1) { + log.error("Server socket couldn't be closed cleanly", e1); + } + } + } + + } + + public void listen() { + System.out.println("Start listening..."); + + ExecutorService es = Executors.newCachedThreadPool(); + + while (true) { + try { + Socket socket = serverSocket.accept(); + System.out.println("Got connection from " + socket.getInetAddress()); + System.out.println("Starting ClientWorker"); + es.execute(new ClientWorker(socket)); + + } catch (IOException e) { + e.printStackTrace(); + } + } + } +} diff --git a/cnc/src/main/java/org/btcollider/cnc/dto/KeyRange.java b/cnc/src/main/java/org/btcollider/cnc/dto/KeyRange.java new file mode 100644 index 0000000..e6ba3b6 --- /dev/null +++ b/cnc/src/main/java/org/btcollider/cnc/dto/KeyRange.java @@ -0,0 +1,32 @@ +package org.btcollider.cnc.dto; + +import java.util.BitSet; + +import org.btcollider.cnc.CnC; + +public class KeyRange { + private BitSet start; + private BitSet end; + + public KeyRange(BitSet start, BitSet end) { + assert start.length() <= CnC.MAX_BITS && end.length() <= CnC.MAX_BITS; + + this.start = start; + this.end = end; + } + + public BitSet getStart() { + return start; + } + + public BitSet getEnd() { + return end; + } + + public Long getTotal() { + // asserted that <= MAX_BITS, first long contains all significant bits + Long total = getStart().toLongArray()[0] - getEnd().toLongArray()[0]; + + return total; + } +} diff --git a/cnc/src/main/java/org/btcollider/cnc/keysrv/KeyServer.java b/cnc/src/main/java/org/btcollider/cnc/keysrv/KeyServer.java new file mode 100644 index 0000000..e9c47e2 --- /dev/null +++ b/cnc/src/main/java/org/btcollider/cnc/keysrv/KeyServer.java @@ -0,0 +1,198 @@ +package org.btcollider.cnc.keysrv; + +import java.util.BitSet; + +import org.btcollider.cnc.CnC; +import org.btcollider.cnc.dto.KeyRange; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class KeyServer { + final Logger log = LoggerFactory.getLogger(KeyServer.class); + + private static KeyServer instance; + + private KeyTree root; + private int index; + private int depth; + + protected KeyServer(int index, int depth) { + assert Math.pow(2, CnC.MAX_BITS) <= Long.MAX_VALUE; + assert index < CnC.MAX_BITS && depth <= CnC.MAX_BITS; + + this.depth = depth; + this.index = index; + this.root = generateKeyTree(depth); + } + + public static void init(int index, int depth) { + instance = new KeyServer(index, depth); + } + + public static KeyServer getInstance() { + if (instance == null) + throw new IllegalAccessError("KeyServer is not initialized"); + + return instance; + } + + public KeyRange getRange() { + BitSet keyStart = new BitSet(CnC.MAX_BITS); + + recCollectKey(root, keyStart, index); + + BitSet keyEnd = new BitSet(CnC.MAX_BITS); + keyEnd.or(keyStart); // set keyEnd to keyStart + keyEnd.flip(0, index + 1 - depth); // reverse last bits + + KeyRange kr = new KeyRange(keyStart, keyEnd); + + return kr; + } + + public void setSearched(Long from, Long to) { + assert from >= 0 && to >= 0 && from <= to; + + long step = (long) Math.pow(2, ((index + 1) - depth)); + + for (long i = from; i < to; i += step) { + BitSet searchedKey = BitSet.valueOf(new long[] { i }); + markSearched(searchedKey); + } + } + + public void setSearched(KeyRange keyRange) { + markSearched(keyRange.getStart()); + } + + public void setInWork(KeyRange keyRange) { + markInWork(keyRange.getStart()); + } + + private void markInWork(BitSet key) { + KeyTree keyTreePointer = root; + + int curIdx = key.length() - 2; // ignore root bit + while (curIdx >= 0) { + if (key.get(curIdx)) + keyTreePointer = keyTreePointer.getRight(); + else + keyTreePointer = keyTreePointer.getLeft(); + + if (keyTreePointer.inWork()) { + break; // path already marked as inWork + } + + if (keyTreePointer.isLeaf()) { // end of tree reached + keyTreePointer.setInWork(); + recMarkInWorkParents(keyTreePointer); + break; + } + + curIdx--; + } + } + + private void recMarkInWorkParents(KeyTree child) { + if (child.getParent() == null) + return; + + KeyTree parent = child.getParent(); + + if (!parent.getLeft().inWork() || !parent.getRight().inWork()) { + return; + } + + // Mark parent searched too, check next level + parent.setInWork(Long.max(parent.getLeft().inWorkSince(), parent.getRight().inWorkSince())); + + recMarkSearchedParents(parent); + } + + private void markSearched(BitSet key) { + KeyTree keyTreePointer = root; + + int curIdx = key.length() - 2; // ignore root bit + while (curIdx >= 0) { + if (key.get(curIdx)) + keyTreePointer = keyTreePointer.getRight(); + else + keyTreePointer = keyTreePointer.getLeft(); + + if (keyTreePointer.isSearched()) + break; // path already marked as searched + + if (keyTreePointer.isLeaf()) { // end of tree reached + keyTreePointer.setSearched(true); + recMarkSearchedParents(keyTreePointer); + break; + } + + curIdx--; + } + } + + private void recMarkSearchedParents(KeyTree child) { + if (child.getParent() == null) + return; + + KeyTree parent = child.getParent(); + + if (!parent.getLeft().isSearched() || !parent.getRight().isSearched()) { + return; + } + + // Mark parent searched too, check next level + parent.setSearched(true); + recMarkSearchedParents(parent); + } + + private KeyTree generateKeyTree(int depth) { + assert depth > 0; + + KeyTree root = new KeyTree(true, null); + recGenKeyTree(root, depth - 1); + + return root; + } + + private void recGenKeyTree(KeyTree kt, int depth) { + assert depth >= 0; + + if (depth == 0) + return; + + KeyTree left = new KeyTree(false, kt); + KeyTree right = new KeyTree(true, kt); + + kt.setLeft(left); + kt.setRight(right); + + recGenKeyTree(left, depth - 1); + recGenKeyTree(right, depth - 1); + } + + private void recCollectKey(KeyTree kt, BitSet keyStart, int curIndex) { + assert curIndex > 0; + + keyStart.set(curIndex, kt.getValue()); + + KeyTree left = kt.getLeft(); + KeyTree right = kt.getRight(); + + // Finish criteria + if (left.isLeaf() && !left.isSearched()) { + keyStart.set(curIndex - 1, left.getValue()); + return; + } else if (right.isLeaf() && !right.isSearched()) { + keyStart.set(curIndex - 1, right.getValue()); + return; + } + + // Recursive depth-first traversal + if (!left.isSearched()) + recCollectKey(left, keyStart, curIndex - 1); + else + recCollectKey(right, keyStart, curIndex - 1); + } +} diff --git a/cnc/src/main/java/org/btcollider/cnc/keysrv/KeyTree.java b/cnc/src/main/java/org/btcollider/cnc/keysrv/KeyTree.java new file mode 100644 index 0000000..5b5d76e --- /dev/null +++ b/cnc/src/main/java/org/btcollider/cnc/keysrv/KeyTree.java @@ -0,0 +1,72 @@ +package org.btcollider.cnc.keysrv; + +public class KeyTree { + private boolean value; + private boolean searched; + + private long inWorkSince; + + private KeyTree parent; + + private KeyTree left; + private KeyTree right; + + public KeyTree(boolean value, KeyTree parent) { + this.value = value; + this.parent = parent; + this.searched = false; + this.inWorkSince = -1; + } + + public boolean isLeaf() { + return left == null && right == null; + } + + public boolean getValue() { + return value; + } + + public KeyTree getParent() { + return parent; + } + + public KeyTree getLeft() { + return left; + } + + public void setLeft(KeyTree left) { + this.left = left; + } + + public KeyTree getRight() { + return right; + } + + public void setRight(KeyTree right) { + this.right = right; + } + + public Boolean isSearched() { + return searched; + } + + public void setSearched(boolean searched) { + this.searched = searched; + } + + public void setInWork(long since) { + this.inWorkSince = System.currentTimeMillis(); + } + + public void setInWork() { + this.inWorkSince = System.currentTimeMillis(); + } + + public boolean inWork() { + return inWorkSince != -1; + } + + public long inWorkSince() { + return this.inWorkSince; + } +} diff --git a/cnc/src/main/resources/log4j2.xml b/cnc/src/main/resources/log4j2.xml new file mode 100644 index 0000000..443a5b4 --- /dev/null +++ b/cnc/src/main/resources/log4j2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/cnc/src/test/java/org/btcollider/cnc/AppTest.java b/cnc/src/test/java/org/btcollider/cnc/AppTest.java new file mode 100644 index 0000000..f030ee1 --- /dev/null +++ b/cnc/src/test/java/org/btcollider/cnc/AppTest.java @@ -0,0 +1,38 @@ +package org.btcollider.cnc; + +import junit.framework.Test; +import junit.framework.TestCase; +import junit.framework.TestSuite; + +/** + * Unit test for simple App. + */ +public class AppTest + extends TestCase +{ + /** + * Create the test case + * + * @param testName name of the test case + */ + public AppTest( String testName ) + { + super( testName ); + } + + /** + * @return the suite of tests being tested + */ + public static Test suite() + { + return new TestSuite( AppTest.class ); + } + + /** + * Rigourous Test :-) + */ + public void testApp() + { + assertTrue( true ); + } +}