core/riot/reference/RiotEcc.c (1,245 lines of code) (raw):
/******************************************************************************
* Copyright (c) 2014, AllSeen Alliance. All rights reserved.
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*****************************************************************************/
/*
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See LICENSE in the project root.
*/
//
// 4-MAY-2015; RIoT adaptation (DennisMa;MSFT).
//
#include "stdbool.h"
#include "stdint.h"
#include "include/RiotDerDec.h"
#include "include/RiotDerEnc.h"
#include "include/RiotEcc.h"
#include "include/RiotKdf.h"
#include "include/RiotSha256.h"
#include "include/RiotStatus.h"
#include "riot/riot_core.h"
// P256 is tested directly with known answer tests from example in
// ANSI X9.62 Annex L.4.2. (See item in pt_mpy_testcases below.)
// Mathematica code, written in a non-curve-specific way, was also
// tested on the ANSI example, then used to generate both P192 and
// P256 test cases.
//
// This file exports the functions ECDH_generate, ECDH_derive, and
// optionally, ECDSA_sign and ECDSA_Ref_verify. It depends on a function
// get_random_bytes, which is expected to be of cryptographic quality.
//
//
// References:
//
// [KnuthV2] is D.E. Knuth, The Art of Computer Programming, Volume 2:
// Seminumerical Algorithms, 1969.
//
// [HMV] is D. Hankerson, A. Menezes, and S. Vanstone, Guide to
// Elliptic Curve Cryptography, 2004.
//
// [Wallace] is C.S. Wallace, "A suggestion for a Fast Multiplier",
// IEEE Transactions on Electronic Computers, EC-13 no. 1, pp 14-17,
// 1964.
//
// [ANSIX9.62] is ANSI X9.62-2005, "Public Key Cryptography for the Financial
// Services Industry The Elliptic Curve Digital Signature Algorithm
// (ECDSA)".
//
//
// The vast majority of cycles in programs like this are spent in
// modular multiplication. The usual approach is Montgomery
// multiplication, which effectively does two multiplications in place
// of one multiplication and one reduction. However, this program is
// dedicated to the NIST standard curves P256 and P192. Most of the
// NIST curves have the property that they can be expressed as a_i *
// 2^(32*i), where a_i is -1, 0, or +1. For example P192 is 2^(6*32)
// - 2^(2*32) - 2^(0*32). This allows easy word-oriented reduction
// (32 bit words): The word at position 6 can just be subtracted from
// word 6 (i.e. word 6 zeroed), and added to words 2 and 0. This is
// faster than Montgomery multiplication.
//
// Two problems with the naive implementation suggested above are carry
// propagation and getting the reduction precise.
//
// Every time you do an add or subtract you have to propagate carries.
// The result might come out between the modulus and 2^192 or 2^256,
// in which case you subtract the modulus. Most carry propagation is avoided
// by using 64 bit words during computation, even though the radix is only
// 2^32. A carry propagation is done once in the multiplication
// and once again after the reduction step. (This idea comes from the carry
// save adder used in hardware designs.)
//
// Exact reduction is required for only a few operations: comparisons,
// and halving. The multiplier for point multiplication must also be
// exactly reduced. So we do away with the requirement for exact
// reduction in most operations. Thus, any reduced value, X, can may
// represented by X + k * modulus, for any integer k, as long as the
// result is representable in the data structure. Typically k is
// between -1 and 1. (A bigval_t has one more 32 bit word than is
// required to hold the modulus, and is interpreted as 2's complement
// binary, little endian by word, native endian within words.)
//
// An exact reduction function is supplied, and must be called as necessary.
//
#define ASRT(_X) if(!(_X)) {goto Error;}
#define CHK(_X) if(((_X)) < 0) {goto Error;}
#if USES_EPHEMERAL
//
// The external function get_random_bytes is expected to be available.
// It must return 0 on success, and -1 on error. Feel free to rename
// this function, if necessary.
//
// static int get_random_bytes(uint8_t *buf, size_t len);
#endif
//
// CONFIGURATION STUFF
//
// All these values are undefined. It seems better to set the preprocessor
// variables in the makefile, and thus avoid generating many different versions
// of the code. This may not be practical with ECC_P192 and ECC_P256, but at
// least that is only in the RiotEcc.h file.
//
#if ECDSA_SIGN || ECDSA_VERIFY
#define ECDSA
#endif
// Define ARM7_ASM to use assembly code specially for the ARM7 processor
// #define ARM7_ASM
// Define SMALL_CODE to skip unrolling loops
// #define SMALL_CODE
// Define SPECIAL_SQUARE to generate a special case for squaring. Special
// squaring should just about halve the number of multiplies, but on Windows
// machines and if loops are unrolled (SMALL_CODE not defined) actually
// causes slight slowing.
#define SPECIAL_SQUARE
// Define MPY2BITS to consume the multiplier two bits at a time.
#define MPY2BITS
// Define ECC_TEST to rename the the exported symbols to avoid name collisions
// with OpenSSL and a few other things necessary for linking with the test
// program ecctest.c
// #define ECC_TEST
#ifdef ECC_TEST
#define ECDSA_sign TEST_ECDSA_sign
#define ECDSA_Ref_verify TEST_ECDSA_verify
#define COND_STATIC
#else
#define COND_STATIC static
#endif
typedef struct {
int64_t data[2 * BIGLEN];
} dblbigval_t;
// These values describe why the verify failed. This simplifies testing.
typedef enum {
V_SUCCESS = 0,
V_R_ZERO,
V_R_BIG,
V_S_ZERO,
V_S_BIG,
V_INFINITY,
V_UNEQUAL,
} verify_res_t;
typedef enum {
MOD_MODULUS = 0,
MOD_ORDER,
} modulus_val_t;
#define MSW (BIGLEN - 1)
static void big_adjustP (bigval_t *tgt, bigval_t const *a, int64_t k);
static void big_1wd_mpy (bigval_t *tgt, bigval_t const *a, int32_t k);
static void big_sub (bigval_t *tgt, bigval_t const *a, bigval_t const *b);
static void big_precise_reduce (bigval_t *tgt, bigval_t const *a, bigval_t const *modulus);
#define big_is_negative(a) ((int32_t)(a)->data[MSW] < 0)
// Does approximate reduction. Subtracts most significant word times modulus
// from src. The double cast is important to get sign extension right.
#define big_approx_reduceP(tgt, src) \
big_adjustP(tgt, src, -(int64_t)(int32_t)(src)->data[MSW])
// If tgt is a modular value, it must be precisely reduced.
#define big_is_odd(tgt) ((tgt)->data[0] & 1)
// Squares, always modulo the modulus.
#define big_sqrP(tgt, a) big_mpyP(tgt, a, a, MOD_MODULUS)
#define m1 0xffffffffU
#ifndef MIN
#define MIN(a, b) ((a) < (b) ? (a) : (b))
#endif
#ifndef MAX
# define MAX(a, b) ((a) > (b) ? (a) : (b))
#endif
#define OVERFLOWCHECK(sum, a, b) ((((a) > 0) && ((b) > 0) && ((sum) <= 0)) || \
(((a) < 0) && ((b) < 0) && ((sum) >= 0)))
// NOTE WELL! The Z component must always be precisely reduced.
typedef struct {
bigval_t X;
bigval_t Y;
bigval_t Z;
} jacobian_point_t;
static bigval_t const big_zero = {{0, 0, 0, 0, 0, 0, 0}};
static bigval_t const big_one = {{1, 0, 0, 0, 0, 0, 0}};
static affine_point_t const affine_infinity = {
{{0, 0, 0, 0, 0, 0, 0}},
{{0, 0, 0, 0, 0, 0, 0}},
true
};
static jacobian_point_t const jacobian_infinity = {
{{1, 0, 0, 0, 0, 0, 0}},
{{1, 0, 0, 0, 0, 0, 0}},
{{0, 0, 0, 0, 0, 0, 0}}
};
static bigval_t const modulusP256 = {{m1, m1, m1, 0, 0, 0, 1, m1, 0}};
static bigval_t const b_P256 = {
{
0x27d2604b, 0x3bce3c3e, 0xcc53b0f6, 0x651d06b0,
0x769886bc, 0xb3ebbd55, 0xaa3a93e7, 0x5ac635d8, 0x00000000
}
};
static bigval_t const orderP256 = {
{
0xfc632551, 0xf3b9cac2, 0xa7179e84, 0xbce6faad,
0xffffffff, 0xffffffff, 0x00000000, 0xffffffff,
0x00000000
}
};
#ifdef ECDSA
static dblbigval_t const orderDBL256 = {
{
0xfc632551LL - 0x100000000LL,
0xf3b9cac2LL - 0x100000000LL + 1LL,
0xa7179e84LL - 0x100000000LL + 1LL,
0xbce6faadLL - 0x100000000LL + 1LL,
0xffffffffLL - 0x100000000LL + 1LL,
0xffffffffLL - 0x100000000LL + 1LL,
0x00000000LL + 0x1LL,
0xffffffffLL - 0x100000000LL,
0x00000000LL + 1LL
}
};
#endif
static affine_point_t const baseP256 = {
{{
0xd898c296, 0xf4a13945, 0x2deb33a0, 0x77037d81,
0x63a440f2, 0xf8bce6e5, 0xe12c4247, 0x6b17d1f2
}},
{{
0x37bf51f5, 0xcbb64068, 0x6b315ece, 0x2bce3357,
0x7c0f9e16, 0x8ee7eb4a, 0xfe1a7f9b, 0x4fe342e2
}},
false
};
#define modulusP modulusP256
#define orderP orderP256
#define orderDBL orderDBL256
#define base_point baseP256
#define curve_b b_P256
#ifdef ARM7_ASM
//
// cum_carry: 32-bit word that accumulates carries
// sum0: lower half 32-bit word of sum
// sum1: higher half 32-bit word of sum
// a: 32-bit operand to be multiplied
// b: 32-bit operand to be multiplied
// tmpr0, tmpr1: two temporary words
// sum = sum + A*B where cout may contain carry info from previous operations
//
#define MULACC(a, b) \
__asm \
{ \
UMULL tmpr0, tmpr1, a, b; \
ADDS sum0, sum0, tmpr0; \
ADCS sum1, sum1, tmpr1; \
ADC cum_carry, cum_carry, 0x0; \
}
#define MULACC_DOUBLE(a, b) \
__asm \
{ \
UMULL tmpr0, tmpr1, a, b; \
ADDS sum0, sum0, tmpr0; \
ADCS sum1, sum1, tmpr1; \
ADC cum_carry, cum_carry, 0x0; \
ADDS sum0, sum0, tmpr0; \
ADCS sum1, sum1, tmpr1; \
ADC cum_carry, cum_carry, 0x0; \
}
#define ACCUM(ap, bp) MULACC(*(ap), *(bp))
#define ACCUMDBL(ap, bp) MULACC_DOUBLE(*(ap), *(bp))
#else // ARM7_ASM, below is platform independent
//
// (sum, carry) += a * b
//
static void mpy_accum (int *cumcarry, uint64_t *sum, uint32_t a, uint32_t b)
{
uint64_t product = (uint64_t) a * (uint64_t) b;
uint64_t lsum = *sum;
lsum += product;
if (lsum < product) {
*cumcarry += 1;
}
*sum = lsum;
}
#ifdef SPECIAL_SQUARE
// (sum, carry) += 2 * a * b.
// Attempts to reduce writes and branches caused slowdown on windows machines.
static void mpy_accum_dbl (int *cumcarry, uint64_t *sum, uint32_t a, uint32_t b)
{
uint64_t product = (uint64_t) a * (uint64_t) b;
uint64_t lsum = *sum;
lsum += product;
if (lsum < product) {
*cumcarry += 1;
}
lsum += product;
if (lsum < product) {
*cumcarry += 1;
}
*sum = lsum;
}
#endif
// ap and bp are pointers to the words to be multiplied and accumulated.
#define ACCUM(ap, bp) mpy_accum(&cum_carry, &u_accum, *(ap), *(bp))
#define ACCUMDBL(ap, bp) mpy_accum_dbl(&cum_carry, &u_accum, *(ap), *(bp))
#endif
//
// The big_mpyP algorithm first multiplies the two arguments, with the
// outer loop indexing over output words, and the inner "loop"
// (unrolled unless SMALL_CODE is defined), collecting all the terms
// that contribute to that output word.
//
// The implementation is inspired by the Wallace Tree often used in
// hardware [Wallace], where (0, 1) terms of the same weight are
// collected together into a sequence values each of which can be on
// the order of the number of bits in a word, and then the sequence is
// turned into a binary number with a carry save adder. This is
// generalized from base 2 to base 2^32.
//
// The first part of the algorithm sums together products of equal
// weight. The outer loop does carry propagation and makes each value
// at most 32 bits.
//
// Then corrections are applied for negative arguments. (The first
// part essentially does unsigned multiplication.)
//
// The reduction proceeds in 2 steps. The first treats the 32 bit
// values (in 64 bit words) from above as though they were
// polynomials, and reduces by the paper and pencil method. Carries
// are propagated and the result collapsed to a sequence of 32 bit
// words (in the target). The second step subtracts MSW * modulus
// from the result. This usually (but not always) results in the MSW
// being zero. (And that makes subsequent multiplications faster.)
//
// The modselect parameter chooses whether reduction is mod the modulus
// or the order of the curve. If ECDSA is not defined, this parameter
// is ignored, and the curve modulus is used.
//
//
// Computes a * b, approximately reduced mod modulusP or orderP,
// depending on the modselect flag.
//
static void big_mpyP (bigval_t *tgt, bigval_t const *a, bigval_t const *b, modulus_val_t modselect)
{
int64_t w[2 * BIGLEN];
int64_t s_accum; // signed
int i, minj, maxj, a_words, b_words, cum_carry;
#ifdef SMALL_CODE
int j;
#else
uint32_t const *ap;
uint32_t const *bp;
#endif
#ifdef ARM7_ASM
uint32_t tmpr0, tmpr1, sum0, sum1;
#else
uint64_t u_accum;
#endif
#ifdef ECDSA
#define MODSELECT modselect
#else
#define MODSELECT MOD_MODULUS
#endif
a_words = BIGLEN;
while (a_words > 0 && a->data[a_words - 1] == 0) {
--a_words;
}
//
// i is target index. The j (in comments only) indexes
// through the multiplier.
//
#ifdef ARM7_ASM
sum0 = 0;
sum1 = 0;
cum_carry = 0;
#else
u_accum = 0;
cum_carry = 0;
#endif
#ifndef SPECIAL_SQUARE
#define NO_SPECIAL_SQUARE 1
#else
#define NO_SPECIAL_SQUARE 0
#endif
if (NO_SPECIAL_SQUARE || (a != b)) {
// normal multiply
// compute length of b
b_words = BIGLEN;
while (b_words > 0 && b->data[b_words - 1] == 0) {
--b_words;
}
// iterate over words of output
for (i = 0; i < a_words + b_words - 1; ++i) {
//
// Run j over all possible values such that
// 0 <= j < b_words && 0 <= i-j < a_words.
// Hence
// j >= 0 and j > i - a_words and
// j < b_words and j <= i
//
// (j exists only in the mind of the reader.)
//
maxj = MIN (b_words - 1, i);
minj = MAX (0, i - a_words + 1);
// ACCUM accumulates into <cum_carry, u_accum>.
#ifdef SMALL_CODE
for (j = minj; j <= maxj; ++j) {
ACCUM (a->data + i - j, b->data + j);
}
#else // SMALL_CODE not defined
//
// The inner loop (over j, running from minj to maxj) is
// unrolled. Sequentially increasing case values in the code
// are intended to coax the compiler into emitting a jump
// table. Here j runs from maxj to minj, but addition is
// commutative, so it doesn't matter.
//
ap = &a->data[i - minj];
bp = &b->data[minj];
// the order is opposite the loop, but addition is commutative
switch (8 - (maxj - minj)) {
case 0:
ACCUM (ap - 8, bp + 8); // j = 8
/* fall through */ /* no break */
case 1:
ACCUM (ap - 7, bp + 7);
/* fall through */ /* no break */
case 2:
ACCUM (ap - 6, bp + 6);
/* fall through */ /* no break */
case 3:
ACCUM (ap - 5, bp + 5);
/* fall through */ /* no break */
case 4:
ACCUM (ap - 4, bp + 4);
/* fall through */ /* no break */
case 5:
ACCUM (ap - 3, bp + 3);
/* fall through */ /* no break */
case 6:
ACCUM (ap - 2, bp + 2);
/* fall through */ /* no break */
case 7:
ACCUM (ap - 1, bp + 1);
/* fall through */ /* no break */
case 8:
ACCUM (ap - 0, bp + 0); // j = 0
/* fall through */ /* no break */
}
#endif // SMALL_CODE not defined
// The total value is
// w + u_accum << (32 *i) + cum_carry << (32 * i + 64).
// The steps from here to the end of the i-loop (not counting
// squaring branch) and the increment of i by the loop
// maintain the invariant that the value is constant.
// (Assume w had been initialized to zero, even though we
// really didn't.)
#ifdef ARM7_ASM
w[i] = sum0;
sum0 = sum1;
sum1 = cum_carry;
cum_carry = 0;
#else
w[i] = u_accum & 0xffffffffULL;
u_accum = (u_accum >> 32) + ((uint64_t) cum_carry << 32);
cum_carry = 0;
#endif
}
}
else {
// squaring
#ifdef SPECIAL_SQUARE
// a[i] * a[j] + a[j] * a[i] == 2 * (a[i] * a[j]), so
// we can cut the number of multiplies nearly in half.
for (i = 0; i < 2 * a_words - 1; ++i) {
// Run j over all possible values such that
// 0 <= j < a_words && 0 <= i-j < a_words && j < i-j
// Hence
// j >= 0 and j > i - a_words and
// j < a_words and 2*j < i
//
maxj = MIN (a_words - 1, i);
// Only go half way. Must use (i-1)>> 1, not (i-1)/ 2
maxj = MIN (maxj, (i - 1) >> 1);
minj = MAX (0, i - a_words + 1);
#ifdef SMALL_CODE
for (j = minj; j <= maxj; ++j) {
ACCUMDBL (a->data + i - j, a->data + j);
}
// j live
if ((i & 1) == 0) {
ACCUM (a->data + j, a->data + j);
}
#else // SMALL_CODE not defined
ap = &a->data[i - minj];
bp = &a->data[minj];
switch (8 - (maxj - minj)) {
case 0:
ACCUMDBL (ap - 8, bp + 8); // j = 8
/* fall through */ /* no break */
case 1:
ACCUMDBL (ap - 7, bp + 7);
/* fall through */ /* no break */
case 2:
ACCUMDBL (ap - 6, bp + 6);
/* fall through */ /* no break */
case 3:
ACCUMDBL (ap - 5, bp + 5);
/* fall through */ /* no break */
case 4:
ACCUMDBL (ap - 4, bp + 4);
/* fall through */ /* no break */
case 5:
ACCUMDBL (ap - 3, bp + 3);
/* fall through */ /* no break */
case 6:
ACCUMDBL (ap - 2, bp + 2);
/* fall through */ /* no break */
case 7:
ACCUMDBL (ap - 1, bp + 1);
/* fall through */ /* no break */
case 8:
ACCUMDBL (ap - 0, bp + 0); // j = 0
/* fall through */ /* no break */
}
// Even numbered columns (zero based) have a middle element.
if ((i & 1) == 0) {
ACCUM (a->data + maxj + 1, a->data + maxj + 1);
}
#endif // SMALL_CODE not defined
// The total value is
// w + u_accum << (32 *i) + cum_carry << (32 * i + 64).
// The steps from here to the end of i-loop and
// the increment of i by the loop maintain the invariant
// that the total value is unchanged.
// (Assume w had been initialized to zero, even though we
// really didn't.)
#ifdef ARM7_ASM
w[i] = sum0;
sum0 = sum1;
sum1 = cum_carry;
cum_carry = 0;
#else // ARM7_ASM not defined
w[i] = u_accum & 0xffffffffULL;
u_accum = (u_accum >> 32) + ((uint64_t) cum_carry << 32);
cum_carry = 0;
#endif // ARM7_ASM not defined
}
#endif // SPECIAL_SQUARE
} // false branch of NO_SPECIAL_SQUARE || (a != b)
// The total value as indicated above is maintained invariant
// down to the approximate reduction code below.
// propagate any residual to next to end of array
for (; i < 2 * BIGLEN - 1; ++i) {
#ifdef ARM7_ASM
w[i] = sum0;
sum0 = sum1;
sum1 = 0;
#else
w[i] = u_accum & 0xffffffffULL;
u_accum >>= 32;
#endif
}
// i is still live
// from here on, think of w as containing signed values
// Last value of the array, still using i. We store the entire 64
// bits. There are two reasons for this. The pedantic one is that
// this clearly maintains our invariant that the value has not
// changed. The other one is that this makes w[BIGNUM-1] negative
// if the result was negative, and reduction depends on this.
#ifdef ARM7_ASM
w[i] = ((uint64_t) sum1 << 32) | sum0;
// sum1 = sum0 = 0; maintain invariant
#else
w[i] = u_accum;
// u_accum = 0; maintain invariant
#endif
//
// Apply correction if a or b are negative. It would be nice to
// put this inside the i-loop to reduce memory bandwidth. Later...
//
// signvedval(a) = unsignedval(a) - 2^(32*BIGLEN)*isneg(a).
//
// so signval(a) * signedval(b) = unsignedval(a) * unsignedval[b] -
// isneg(a) * unsignedval(b) * 2^(32*BIGLEN) -
// isneg(b) * unsingedval(a) * 2^ (32*BIGLEN) +
// isneg(a) * isneg(b) * 2 ^(2 * 32 * BIGLEN)
//
// If one arg is zero and the other is negative, obviously no
// correction is needed, but we do not make a special case, since
// the "correction" only adds in zero.
if (big_is_negative (a)) {
for (i = 0; i < BIGLEN; ++i) {
w[i + BIGLEN] -= b->data[i];
}
}
if (big_is_negative (b)) {
for (i = 0; i < BIGLEN; ++i) {
w[i + BIGLEN] -= a->data[i];
}
if (big_is_negative (a)) {
// both negative
w[2 * BIGLEN - 1] += 1ULL << 32;
}
}
//
// The code from here to the end of the function maintains w mod
// modulusP constant, even though it changes the value of w.
//
// reduce (approximate)
if (MODSELECT == MOD_MODULUS) {
for (i = 2 * BIGLEN - 1; i >= MSW; --i) {
int64_t v;
v = w[i];
if (v != 0) {
w[i] = 0;
w[i - 1] += v;
w[i - 2] -= v;
w[i - 5] -= v;
w[i - 8] += v;
}
}
}
else {
// modulo order. Not performance critical
#if ECDSA_SIGN || ECDSA_VERIFY
int64_t carry;
// convert to 32 bit values, except for most signifiant word
carry = 0;
for (i = 0; i < 2 * BIGLEN - 1; ++i) {
w[i] += carry;
carry = w[i] >> 32;
w[i] -= carry << 32;
}
// i is live
w[i] += carry;
// each iteration knocks off word i
for (i = 2 * BIGLEN - 1; i >= MSW; --i) { // most to least significant
int64_t v;
int64_t tmp;
int64_t tmp2;
int j;
int k;
for (k = 0; w[i] != 0 && k < 3; ++k) {
v = w[i];
carry = 0;
for (j = i - MSW; j < 2 * BIGLEN; ++j) {
if (j <= i) {
tmp2 = -(v * orderDBL.data[j - i + MSW]);
tmp = w[j] + tmp2 + carry;
}
else {
tmp = w[j] + carry;
}
if (j < 2 * BIGLEN - 1) {
carry = tmp >> 32;
tmp -= carry << 32;
}
else {
carry = 0;
}
w[j] = tmp;
}
}
}
#endif // ECDSA_SIGN || ECDSA_VERIFY
}
// propagate carries and copy out to tgt in 32 bit chunks.
s_accum = 0;
for (i = 0; i < BIGLEN; ++i) {
s_accum += w[i];
tgt->data[i] = (uint32_t) s_accum;
s_accum >>= 32; // signed, so sign bit propagates
}
// final approximate reduction
if (MODSELECT == MOD_MODULUS) {
big_approx_reduceP (tgt, tgt);
}
else {
#ifdef ECDSA
if (tgt->data[MSW]) {
// Keep it simple! At one time all this was done in place,
// and was totally non-obvious.
bigval_t tmp;
// The most significant word is signed, even though the
// whole array has declared uint32_t.
big_1wd_mpy (&tmp, &orderP, (int32_t) tgt->data[MSW]);
big_sub (tgt, tgt, &tmp);
}
#endif // ECDSA
}
}
//
// Adds k * modulusP to a and stores into target. -2^62 <= k <= 2^62 .
// (This is conservative.)
static void big_adjustP (bigval_t *tgt, bigval_t const *a, int64_t k)
{
#define RDCSTEP(i, adj) \
w += a->data[i]; \
w += (adj); \
tgt->data[i] = (uint32_t)(int32_t)w; \
w >>= 32;
// add k * modulus
if (k != 0) {
int64_t w = 0;
RDCSTEP (0, -k);
RDCSTEP (1, 0);
RDCSTEP (2, 0);
RDCSTEP (3, k);
RDCSTEP (4, 0);
RDCSTEP (5, 0);
RDCSTEP (6, k);
RDCSTEP (7, -k);
RDCSTEP (8, k);
}
else if (tgt != a) {
*tgt = *a;
}
}
//
// Computes k * a and stores into target. Conditions:
// product must be representable in bigval_t.
static void big_1wd_mpy (bigval_t *tgt, bigval_t const *a, int32_t k)
{
int64_t w = 0;
int64_t tmp;
int64_t prod;
int j;
for (j = 0; j <= MSW; ++j) {
prod = (int64_t) k * (int64_t) a->data[j];
tmp = w + prod;
w = tmp;
tgt->data[j] = (uint32_t) w;
w -= tgt->data[j];
w >>= 32;
}
}
//
// Adds a to b as signed (2's complement) numbers. Ok to use for
// modular values if you don't let the sum overflow.
COND_STATIC void big_add (bigval_t *tgt, bigval_t const *a, bigval_t const *b)
{
uint64_t v;
int i;
v = 0;
for (i = 0; i < BIGLEN; ++i) {
v += a->data[i];
v += b->data[i];
tgt->data[i] = (uint32_t) v;
v >>= 32;
}
}
//
// modulo modulusP addition with approximate reduction.
static void big_addP (bigval_t *tgt, bigval_t const *a, bigval_t const *b)
{
big_add (tgt, a, b);
big_approx_reduceP (tgt, tgt);
}
// 2's complement subtraction
static void big_sub (bigval_t *tgt, bigval_t const *a, bigval_t const *b)
{
uint64_t v;
int i;
// negation is equivalent to 1's complement and increment
v = 1; // increment
for (i = 0; i < BIGLEN; ++i) {
v += a->data[i];
v += ~b->data[i]; // 1's complement
tgt->data[i] = (uint32_t) v;
v >>= 32;
}
}
//
//modulo modulusP subtraction with approximate reduction.
static void big_subP (bigval_t *tgt, bigval_t const *a, bigval_t const *b)
{
big_sub (tgt, a, b);
big_approx_reduceP (tgt, tgt);
}
//
// returns 1 if a > b, -1 if a < b, and 0 if a == b.
// a and b are 2's complement. When applied to modular values,
// args must be precisely reduced.
static int big_cmp (bigval_t const *a, bigval_t const *b)
{
int i;
// most significant word is treated as 2's complement
if ((int32_t) a->data[MSW] > (int32_t) b->data[MSW]) {
return (1);
}
else if ((int32_t) a->data[MSW] < (int32_t) b->data[MSW]) {
return (-1);
}
// remainder treated as unsigned
for (i = MSW - 1; i >= 0; --i) {
if (a->data[i] > b->data[i]) {
return (1);
}
else if (a->data[i] < b->data[i]) {
return (-1);
}
}
return (0);
}
//
// Computes tgt = a mod modulus. Only works with moduli slightly
// less than 2**(32*(BIGLEN-1)). Both modulusP and orderP qualify.
static void big_precise_reduce (bigval_t *tgt, bigval_t const *a, bigval_t const *modulus)
{
//
// src is a trick to avoid an extra copy of a to arg a to a
// temporary. Every statement uses src as the src and tgt as the
// destination, and it executes src = tgt, so all subsequent
// operations affect the modified data, not the original. There is
// a case to handle the situation of no modifications having been
// made.
//
bigval_t const *src = a;
// If tgt < 0, a positive value gets added in, so eventually tgt
// will be >= 0. If tgt > 0 and the MSW is non-zero, a non-zero
// value smaller than tgt gets subtracted, so eventually target
// becomes < 1 * 2**(32*MSW), but not negative, i.e. tgt->data[MSW]
// == 0, and thus loop termination is guaranteed.
while ((int32_t) src->data[MSW] != 0) {
if (modulus != &modulusP) {
// General case. Keep it simple!
bigval_t tmp;
// The most significant word is signed, even though the
// whole array has been declared uint32_t.
big_1wd_mpy (&tmp, modulus, (int32_t) src->data[MSW]);
big_sub (tgt, src, &tmp);
}
else {
// just an optimization. The other branch would work, but slower.
big_adjustP (tgt, src, -(int64_t) (int32_t) src->data[MSW]);
}
src = tgt;
}
while (big_cmp (src, modulus) >= 0) {
big_sub (tgt, src, modulus);
src = tgt;
}
while ((int32_t) src->data[MSW] < 0) {
big_add (tgt, src, modulus);
src = tgt;
}
// copy src to tgt if not already done
if (src != tgt) {
*tgt = *src;
}
}
// computes floor(a / 2), 2's complement.
static void big_halve (bigval_t *tgt, bigval_t const *a)
{
uint32_t shiftval;
uint32_t new_shiftval;
int i;
// most significant word is 2's complement. Do it separately.
shiftval = a->data[MSW] & 1;
tgt->data[MSW] = (uint32_t) ((int32_t) a->data[MSW] >> 1);
for (i = MSW - 1; i >= 0; --i) {
new_shiftval = a->data[i] & 1;
tgt->data[i] = (a->data[i] >> 1) | (shiftval << 31);
shiftval = new_shiftval;
}
}
//
// computes tgt, such that 2 * tgt === a, (mod modulusP). NOTE WELL:
// arg a must be precisely reduced. This function could do that, but
// in some cases, arg a is known to already be reduced and we don't
// want to waste cycles. The code could be written more cleverly to
// avoid passing over the data twice in the case of an odd value.
//
static void big_halveP (bigval_t *tgt, bigval_t const *a)
{
if (a->data[0] & 1) {
// odd
big_adjustP (tgt, a, 1);
big_halve (tgt, tgt);
}
else {
// even
big_halve (tgt, a);
}
}
// returns true if a is zero
static bool big_is_zero (bigval_t const *a)
{
int i;
for (i = 0; i < BIGLEN; ++i) {
if (a->data[i] != 0) {
return (false);
}
}
return (true);
}
// returns true if a is one
static bool big_is_one (bigval_t const *a)
{
int i;
if (a->data[0] != 1) {
return (false);
}
for (i = 1; i < BIGLEN; ++i) {
if (a->data[i] != 0) {
return (false);
}
}
return (true);
}
//
// This uses the extended binary GCD (Greatest Common Divisor)
// algorithm. The binary GCD algorithm is presented in [KnuthV2] as
// Algorithm X. The extension to do division is presented in Homework
// Problem 15 and its solution in the back of the book.
//
// The implementation here follows the presentation in [HMV] Algorithm
// 2.22.
//
// If the denominator is zero, it will loop forever. Be careful!
// Modulus must be odd. num and den must be positive.
static void big_divide (bigval_t *tgt, bigval_t const *num, bigval_t const *den,
bigval_t const *modulus)
{
bigval_t u, v, x1, x2;
u = *den;
v = *modulus;
x1 = *num;
x2 = big_zero;
while (!big_is_one (&u) && !big_is_one (&v)) {
while (!big_is_odd (&u)) {
big_halve (&u, &u);
if (big_is_odd (&x1)) {
big_add (&x1, &x1, modulus);
}
big_halve (&x1, &x1);
}
while (!big_is_odd (&v)) {
big_halve (&v, &v);
if (big_is_odd (&x2)) {
big_add (&x2, &x2, modulus);
}
big_halve (&x2, &x2);
}
if (big_cmp (&u, &v) >= 0) {
big_sub (&u, &u, &v);
big_sub (&x1, &x1, &x2);
}
else {
big_sub (&v, &v, &u);
big_sub (&x2, &x2, &x1);
}
}
if (big_is_one (&u)) {
big_precise_reduce (tgt, &x1, modulus);
}
else {
big_precise_reduce (tgt, &x2, modulus);
}
}
static void big_triple (bigval_t *tgt, bigval_t const *a)
{
int i;
uint64_t accum = 0;
// technically, the lower significance words should be treated as
// unsigned and the most significant word treated as signed
// (arithmetic right shift instead of logical right shift), but
// accum can never get negative during processing the lower
// significance words, and the most significant word is the last
// word processed, so what is left in the accum after the final
// shift does not matter.
for (i = 0; i < BIGLEN; ++i) {
accum += a->data[i];
accum += a->data[i];
accum += a->data[i];
tgt->data[i] = (uint32_t) accum;
accum >>= 32;
}
}
//
// The point add and point double algorithms use mixed Jacobian
// and affine coordinates. The affine point (x,y) corresponds
// to the Jacobian point (X, Y, Z), for any non-zero Z, with X = Z^2 * x
// and Y = Z^3 * y. The infinite point is represented in Jacobian
// coordinates as (1, 1, 0).
#define jacobian_point_is_infinity(P) (big_is_zero(&(P)->Z))
static void toJacobian (jacobian_point_t *tgt, affine_point_t const *a)
{
tgt->X = a->x;
tgt->Y = a->y;
tgt->Z = big_one;
}
// a->Z must be precisely reduced
static void toAffine (affine_point_t *tgt, jacobian_point_t const *a)
{
bigval_t zinv, zinvpwr;
if (big_is_zero (&a->Z)) {
*tgt = affine_infinity;
return;
}
big_divide (&zinv, &big_one, &a->Z, &modulusP);
big_sqrP (&zinvpwr, &zinv); // Zinv^2
big_mpyP (&tgt->x, &a->X, &zinvpwr, MOD_MODULUS);
big_mpyP (&zinvpwr, &zinvpwr, &zinv, MOD_MODULUS); // Zinv^3
big_mpyP (&tgt->y, &a->Y, &zinvpwr, MOD_MODULUS);
big_precise_reduce (&tgt->x, &tgt->x, &modulusP);
big_precise_reduce (&tgt->y, &tgt->y, &modulusP);
tgt->infinity = false;
}
//
// From [HMV] Algorithm 3.21.
// tgt = 2 * P. P->Z must be precisely reduced and
// tgt->Z will be precisely reduced
static void pointDouble (jacobian_point_t *tgt, jacobian_point_t const *P)
{
bigval_t x3loc, y3loc, z3loc, t1, t2, t3;
#define x1 (&P->X)
#define y1 (&P->Y)
#define z1 (&P->Z)
#define x3 (&x3loc)
#define y3 (&y3loc)
#define z3 (&z3loc)
// This requires P->Z be precisely reduced
if (jacobian_point_is_infinity (P)) {
*tgt = jacobian_infinity;
return;
}
big_sqrP (&t1, z1);
big_subP (&t2, x1, &t1);
big_addP (&t1, x1, &t1);
big_mpyP (&t2, &t2, &t1, MOD_MODULUS);
big_triple (&t2, &t2);
big_addP (y3, y1, y1);
big_mpyP (z3, y3, z1, MOD_MODULUS);
big_sqrP (y3, y3);
big_mpyP (&t3, y3, x1, MOD_MODULUS);
big_sqrP (y3, y3);
big_halveP (y3, y3);
big_sqrP (x3, &t2);
big_addP (&t1, &t3, &t3);
// x1 not used after this point. Safe to store to tgt, even if aliased
big_subP (&tgt->X, x3, &t1);
#undef x3
#define x3 (&tgt->X)
big_subP (&t1, &t3, x3);
big_mpyP (&t1, &t1, &t2, MOD_MODULUS);
big_subP (&tgt->Y, &t1, y3);
// Z components of returned Jacobian points must
// be precisely reduced
big_precise_reduce (&tgt->Z, z3, &modulusP);
#undef x1
#undef y1
#undef z1
#undef x3
#undef y3
#undef z3
}
//
// From [HMV] Algorithm 3.22
// tgt = P + Q. P->Z must be precisely reduced.
// tgt->Z will be precisely reduced. tgt and P can be aliased.
static void pointAdd (jacobian_point_t *tgt, jacobian_point_t const *P, affine_point_t const *Q)
{
bigval_t t1, t2, t3, t4, x3loc;
if (Q->infinity) {
if (tgt != P) {
*tgt = *P;
}
return;
}
// This requires that P->Z be precisely reduced
if (jacobian_point_is_infinity (P)) {
toJacobian (tgt, Q);
return;
}
#define x1 (&P->X)
#define y1 (&P->Y)
#define z1 (&P->Z)
#define x2 (&Q->x)
#define y2 (&Q->y)
#define x3 (&x3loc)
#define y3 (&y3loc)
#define z3 (&tgt->Z)
big_sqrP (&t1, z1);
big_mpyP (&t2, &t1, z1, MOD_MODULUS);
big_mpyP (&t1, &t1, x2, MOD_MODULUS);
big_mpyP (&t2, &t2, y2, MOD_MODULUS);
big_subP (&t1, &t1, x1);
big_subP (&t2, &t2, y1);
// big_is_zero requires precisely reduced arg
big_precise_reduce (&t1, &t1, &modulusP);
if (big_is_zero (&t1)) {
big_precise_reduce (&t2, &t2, &modulusP);
if (big_is_zero (&t2)) {
toJacobian (tgt, Q);
pointDouble (tgt, tgt);
}
else {
*tgt = jacobian_infinity;
}
return;
}
// store into target. okay, even if tgt is aliased with P,
// as z1 is not subsequently used
big_mpyP (z3, z1, &t1, MOD_MODULUS);
// z coordinates of returned jacobians must be precisely reduced.
big_precise_reduce (z3, z3, &modulusP);
big_sqrP (&t3, &t1);
big_mpyP (&t4, &t3, &t1, MOD_MODULUS);
big_mpyP (&t3, &t3, x1, MOD_MODULUS);
big_addP (&t1, &t3, &t3);
big_sqrP (x3, &t2);
big_subP (x3, x3, &t1);
big_subP (&tgt->X, x3, &t4);
// switch x3 to tgt
#undef x3
#define x3 (&tgt->X)
big_subP (&t3, &t3, x3);
big_mpyP (&t3, &t3, &t2, MOD_MODULUS);
big_mpyP (&t4, &t4, y1, MOD_MODULUS);
// switch y3 to tgt
#undef y3
#define y3 (&tgt->Y)
big_subP (y3, &t3, &t4);
#undef x1
#undef y1
#undef z1
#undef x2
#undef y2
#undef x3
#undef y3
#undef z3
}
// pointMpyP uses a left-to-right binary double-and-add method, which
// is an exact analogy to the left-to-right binary method for
// exponentiation described in [KnuthV2] Section 4.6.3.
// returns bit i of bignum n. LSB of n is bit 0.
#define big_get_bit(n, i) (((n)->data[(i) / 32] >> ((i) % 32)) & 1)
// returns bits i+1 and i of bignum n. LSB of n is bit 0; i <= 30
#define big_get_2bits(n, i) (((n)->data[(i) / 32] >> ((i) % 32)) & 3)
// k must be non-negative. Negative values (incorrectly)
// return the infinite point
static void pointMpyP (affine_point_t *tgt, bigval_t const *k, affine_point_t const *P)
{
int i;
jacobian_point_t Q;
#ifdef MPY2BITS
affine_point_t const *mpyset[4];
affine_point_t twoP, threeP;
#endif // MPY2BITS
if (big_is_negative (k)) {
// This should never happen.
*tgt = affine_infinity;
return;
}
Q = jacobian_infinity;
// faster
if (big_is_zero (k) || big_is_negative (k)) {
*tgt = affine_infinity;
return;
}
#ifndef MPY2BITS
// Classical high-to-low method
// discard high order zeros
for (i = BIGLEN * 32 - 1; i >= 0; --i) {
if (big_get_bit (k, i)) {
break;
}
}
// Can't fall through since k is non-zero. We get here only via the break
// discard highest order 1 bit
--i;
toJacobian (&Q, P);
for (; i >= 0; --i) {
pointDouble (&Q, &Q);
if (big_get_bit (k, i)) {
pointAdd (&Q, &Q, P);
}
}
#else // MPY2BITS defined
// multiply 2 bits at a time
// pre-compute 1P, 2P, and 3P
mpyset[0] = (affine_point_t*) 0;
mpyset[1] = P;
toJacobian (&Q, P); // Q = P
pointDouble (&Q, &Q); // now Q = 2P
toAffine (&twoP, &Q);
mpyset[2] = &twoP;
pointAdd (&Q, &Q, P); // now Q = 3P
toAffine (&threeP, &Q);
mpyset[3] = &threeP;
// discard high order zeros (in pairs)
for (i = BIGLEN * 32 - 2; i >= 0; i -= 2) {
if (big_get_2bits (k, i)) {
break;
}
}
Q = jacobian_infinity;
for (; i >= 0; i -= 2) {
int mbits = big_get_2bits (k, i);
pointDouble (&Q, &Q);
pointDouble (&Q, &Q);
if (mpyset[mbits] != (affine_point_t*) 0) {
pointAdd (&Q, &Q, mpyset[mbits]);
}
}
#endif // MPY2BITS
toAffine (tgt, &Q);
}
COND_STATIC bool on_curveP (affine_point_t const *P)
{
bigval_t sum, product;
if (P->infinity) {
return (true);
}
big_sqrP (&product, &P->x);
big_mpyP (&sum, &product, &P->x, MOD_MODULUS); // x^3
big_triple (&product, &P->x); // 3 x
big_subP (&sum, &sum, &product); // x^3 -3x
big_addP (&sum, &sum, &curve_b); // x^3 -3x + b
big_sqrP (&product, &P->y); // y^2
big_subP (&sum, &sum, &product); // -y^2 + x^3 -3x + b
big_precise_reduce (&sum, &sum, &modulusP);
return (big_is_zero (&sum));
}
#if USES_EPHEMERAL
// returns a bigval between 0 or 1 (depending on allow_zero)
// and order-1, inclusive. Returns 0 on success, -1 otherwise
COND_STATIC int big_get_random_n (bigval_t *tgt, bool allow_zero, const struct rng_engine *rng)
{
int rv;
tgt->data[BIGLEN - 1] = 0;
do {
rv = rng->generate_random_buffer (rng, sizeof (uint32_t) * (BIGLEN - 1), (uint8_t*) tgt);
if (rv != 0) {
return (-1);
}
} while ((!allow_zero && big_is_zero (tgt)) ||
(big_cmp (tgt, &orderP) >= 0));
return (0);
}
//
// computes a secret value, k, and a point, P1, to send to the other
// party. Returns 0 on success, -1 on failure (of the RNG).
int ECDH_generate (affine_point_t *P1, bigval_t *k, const struct rng_engine *rng)
{
int rv;
rv = big_get_random_n (k, false, rng);
if (rv < 0) {
return (-1);
}
pointMpyP (P1, k, &base_point);
return (0);
}
#endif
//
//Derives a secret value, k, and a point, P1, from the value of src.
RIOT_STATUS ECDH_derive (affine_point_t *P1, bigval_t *k, const uint8_t *src, size_t src_len)
{
if (src_len > RIOT_ECC_PRIVATE_BYTES) {
return RIOT_FAILURE;
}
BigIntToBigVal (k, src, src_len);
if (RIOT_DSA_check_privkey (k) != RIOT_SUCCESS) {
return RIOT_FAILURE;
}
pointMpyP (P1, k, &base_point);
if (P1->infinity) {
return RIOT_FAILURE;
}
return RIOT_SUCCESS;
}
// takes the point sent by the other party, and verifies that it is a
// valid point. If 1 <= k < orderP and the point is valid, it stores
// the resulting point *tgt and returns true. If the point is invalid it
// returns false. The behavior with k out of range is unspecified,
// but safe.
COND_STATIC bool ECDH_derive_pt (affine_point_t *tgt, bigval_t const *k, affine_point_t const *Q)
{
if (Q->infinity) {
return (false);
}
if (big_is_negative (&Q->x)) {
return (false);
}
if (big_cmp (&Q->x, &modulusP) >= 0) {
return (false);
}
if (big_is_negative (&Q->y)) {
return (false);
}
if (big_cmp (&Q->y, &modulusP) >= 0) {
return (false);
}
if (!on_curveP (Q)) {
return (false);
}
// [HMV] Section 4.3 states that the above steps, combined with the
// fact the h=1 for the curves used here, implies that order*Q =
// Infinity, which is required by ANSI X9.63.
pointMpyP (tgt, k, Q);
// Q2 can't be infinity if 1 <= k < orderP, which is supposed to be
// the case, but the test is so cheap, we just do it.
if (tgt->infinity) {
return (false);
}
return (true);
}
#if ECDSA_SIGN
//
// This function sets the r and s fields of sig. The implementation
// follows HMV Algorithm 4.29.
static int ECDSA_sign (bigval_t const *msgdgst, bigval_t const *privkey,
const struct rng_engine *rng, ECDSA_sig_t *sig)
{
int rv;
affine_point_t P1;
bigval_t k;
bigval_t t;
startpoint:
rv = ECDH_generate (&P1, &k, rng);
if (rv) {
return (rv);
}
big_precise_reduce (&sig->r, &P1.x, &orderP);
if (big_is_zero (&sig->r)) {
goto startpoint;
}
big_mpyP (&t, privkey, &sig->r, MOD_ORDER);
big_add (&t, &t, msgdgst);
big_precise_reduce (&t, &t, &orderP); // may not be necessary
big_divide (&sig->s, &t, &k, &orderP);
if (big_is_zero (&sig->s)) {
goto startpoint;
}
riot_core_clear (&k, sizeof (bigval_t));
return (0);
}
#endif // ECDSA_SIGN
#if ECDSA_VERIFY
//
// Returns true if the signature is valid.
// The implementation follow HMV Algorithm 4.30.
static verify_res_t ECDSA_verify_inner (bigval_t const *msgdgst, affine_point_t const *pubkey,
ECDSA_sig_t const *sig)
{
// We could reuse variables and save stack space. If stack space
// is tight, u1 and u2 could be the same variable by interleaving
// the big multiplies and the point multiplies. P2 and X could be
// the same variable. X.x could be reduced in place, eliminating
// v. And if you really wanted to get tricky, I think one could use
// unions between the affine and Jacobian versions of points. But
// check that out before doing it.
bigval_t v;
bigval_t w;
bigval_t u1;
bigval_t u2;
affine_point_t P1;
affine_point_t P2;
affine_point_t X;
jacobian_point_t P2Jacobian;
jacobian_point_t XJacobian;
if (big_cmp (&sig->r, &big_one) < 0) {
return (V_R_ZERO);
}
if (big_cmp (&sig->r, &orderP) >= 0) {
return (V_R_BIG);
}
if (big_cmp (&sig->s, &big_one) < 0) {
return (V_S_ZERO);
}
if (big_cmp (&sig->s, &orderP) >= 0) {
return (V_S_BIG);
}
big_divide (&w, &big_one, &sig->s, &orderP);
big_mpyP (&u1, msgdgst, &w, MOD_ORDER);
big_precise_reduce (&u1, &u1, &orderP);
big_mpyP (&u2, &sig->r, &w, MOD_ORDER);
big_precise_reduce (&u2, &u2, &orderP);
pointMpyP (&P1, &u1, &base_point);
pointMpyP (&P2, &u2, pubkey);
toJacobian (&P2Jacobian, &P2);
pointAdd (&XJacobian, &P2Jacobian, &P1);
toAffine (&X, &XJacobian);
if (X.infinity) {
return (V_INFINITY);
}
big_precise_reduce (&v, &X.x, &orderP);
if (big_cmp (&v, &sig->r) != 0) {
return (V_UNEQUAL);
}
return (V_SUCCESS);
}
bool ECDSA_Ref_verify (bigval_t const *msgdgst, affine_point_t const *pubkey,
ECDSA_sig_t const *sig)
{
if (ECDSA_verify_inner (msgdgst, pubkey, sig) == V_SUCCESS) {
return true;
}
return false;
}
#endif // ECDSA_VERIFY
// Convert a number from big endian by uint8_t to bigval_t. If the
// size of the input number is larger than the initialization size
// of a bigval_t ((BIGLEN - 1) * 4), it will be quietly truncated.
//
// @param out pointer to the bigval_t to be produced
// @param in pointer to the big-endian value to convert
// @param inSize number of bytes in the big-endian value
//
void BigIntToBigVal (bigval_t *tgt, void const *in, size_t inSize)
{
unsigned int i;
// The "4"s in the rest of this function are the number of bytes in
// a uint32_t (what bigval_t's are made of). The "8" is the number
// of bits in a uint8_t.
// reduce inSize to modulus size, if necessary
inSize = MIN (inSize, ((BIGLEN - 1) * 4));
*tgt = big_zero;
// move one uint8_t at a time starting with least significant uint8_t
for (i = 0; i < inSize; ++i) {
tgt->data[i / 4] |=
((uint8_t*) in)[inSize - 1 - i] << (8 * (i % 4));
}
}
//
// Convert a number from bigval_t to big endian by uint8_t.
// The conversion will stop after the first (BIGLEN - 1) words have been converted.
// The output size must be (BIGLEN - 1) * 4 bytes long.
//
// @param out pointer to the big endian value to be produced
// @param in pointer to the bigval_t to convert
//
void BigValToBigInt (void *out, const bigval_t *src)
{
int i;
// Start with the most significant word and work down.
// Initialize i with the number of bytes to move - 1.
uint8_t unused;
uint8_t *intermediate = (uint8_t*) out;
(void) unused; // Avoid compiler warnings.
for (i = ((BIGLEN - 1) * 4) - 1; i >= 0; i--) {
*intermediate = (uint8_t) (src->data[i / 4] >> (8 * (i % 4)));
unused = *(intermediate)++;
}
}
#ifdef ECC_TEST
char* ECC_feature_list (void)
{
return ("ECC_P256"
#if ECDSA_SIGN
" ECDSA_SIGN"
#endif
#if ECDSA_VERIFY
" ECDSA_VERIFY"
#endif
#ifdef SPECIAL_SQUARE
" SPECIAL_SQUARE"
#endif
#ifdef SMALL_CODE
" SMALL_CODE"
#endif
#ifdef MPY2BITS
" MPY2BITS"
#endif
#ifdef ARM7_ASM
" ARM7_ASM"
#endif
);
}
#endif // ECC_TEST
#if USES_EPHEMERAL
#include <stdlib.h>
//
// Seeds the DRBG and zeroizes the seed value.
//
void set_drbg_seed (uint8_t *buf, size_t length)
{
size_t i;
unsigned int drbg_seed;
if (buf) {
drbg_seed = 0;
for (i = 0; i < length; i++) {
drbg_seed += ~(buf[i]);
}
srand (~drbg_seed);
riot_core_clear (&drbg_seed, sizeof (unsigned int));
}
}
#endif
#if ECDH_OUT
//
// Generates the Ephemeral Diffie-Hellman key pair.
//
// @param publicKey The output public key
// @param privateKey The output private key
// @param rng The random number generator engine
//
// @return - RIOT_SUCCESS if the key pair is successfully generated.
// - RIOT_FAILURE otherwise
//
RIOT_STATUS RIOT_GenerateDHKeyPair (ecc_publickey *publicKey, ecc_privatekey *privateKey,
const struct rng_engine *rng)
{
if (ECDH_generate (publicKey, privateKey, rng) == 0) {
return RIOT_SUCCESS;
}
return RIOT_FAILURE;
}
#endif
//
// Generates the Diffie-Hellman share secret.
//
// @param peerPublicKey The peer's public key
// @param privateKey The private key
// @param secret The output share secret
//
// @return - RIOT_SUCCESS if the share secret is successfully generated.
// - RIOT_FAILURE otherwise
//
RIOT_STATUS RIOT_GenerateShareSecret (ecc_publickey *peerPublicKey, ecc_privatekey *privateKey,
ecc_secret *secret)
{
bool derive_rv;
derive_rv = ECDH_derive_pt (secret, privateKey, peerPublicKey);
if (!derive_rv) {
return RIOT_FAILURE; // bad
}
else {
if (!on_curveP (secret)) {
return RIOT_FAILURE; // bad
}
}
return RIOT_SUCCESS;
}
#if ECDSA_SIGN
//
// Generates the DSA key pair.
//
// @param publicKey The output public key
// @param privateKey The output private key
// @param rng The random number generator engine
// @return - RIOT_SUCCESS if the key pair is successfully generated
// - RIOT_FAILURE otherwise
//
RIOT_STATUS RIOT_GenerateDSAKeyPair (ecc_publickey *publicKey, ecc_privatekey *privateKey,
const struct rng_engine *rng)
{
if (ECDH_generate (publicKey, privateKey, rng) == 0) {
return RIOT_SUCCESS;
}
return RIOT_FAILURE;
}
//
// Derives a DSA key pair from the supplied value and label
//
// @param publicKey OUT: public key
// @param privateKey OUT: output private key
// @param srcVal IN: Source value for derivation
// @param srcSize IN: Source size. Should not exceed RIOT_ECC_PRIVATE_bytes.
// @return - RIOT_SUCCESS if the keypair is successfully derived
// - RIOT_FAILURE otherwise
//
RIOT_STATUS RIOT_DeriveDsaKeyPair (ecc_publickey *publicKey, ecc_privatekey *privateKey,
const uint8_t *srcVal, size_t srcSize)
{
return ECDH_derive (publicKey, privateKey, srcVal, srcSize);
}
//
// Sign a digest using the DSA key
//
RIOT_STATUS RIOT_DSASignDigest (const uint8_t *digest, size_t digest_size,
const ecc_privatekey *signingPrivateKey, uint8_t *buf, size_t buf_len,
const struct rng_engine *rng, int *out_len)
{
bigval_t source;
ecc_signature sig;
int status;
*out_len = 0;
BigIntToBigVal (&source, digest, digest_size);
status = ECDSA_sign (&source, signingPrivateKey, rng, &sig);
if (status != 0) {
return RIOT_FAILURE;
}
return RIOT_DSA_encode_signature (&sig, buf, buf_len, out_len);
}
//
// Sign a buffer using the DSA key
// @param buf The buffer to sign
// @param len The buffer len
// @param signingPrivateKey The signing private key
// @param rng The random number generator engine
// @param hash The hash engine
// @param sig The output signature
// @return - RIOT_SUCCESS if the signing process succeeds
// - RIOT_FAILURE otherwise
RIOT_STATUS RIOT_DSASign (const uint8_t *buf, uint16_t len, const ecc_privatekey *signingPrivateKey,
const struct rng_engine *rng, const struct hash_engine *hash, ecc_signature *sig)
{
uint8_t digest[SHA256_DIGEST_LENGTH];
size_t max_sig_len = RIOT_ECC_PRIVATE_BYTES * 4;
uint8_t der_sig[max_sig_len];
int sig_len;
int status;
status = hash->calculate_sha256 (hash, buf, len, digest, sizeof (digest));
if (status != 0) {
return RIOT_FAILURE;
}
status = RIOT_DSASignDigest (digest, SHA256_DIGEST_LENGTH, signingPrivateKey, der_sig,
max_sig_len, rng, &sig_len);
if (status != 0) {
return RIOT_FAILURE;
}
return RIOT_DSA_decode_signature (sig, der_sig, sig_len);
}
#endif
#if ECDSA_VERIFY
//
// Verify DSA signature of a digest
// @param digest The digest to sign
// @param digest_size The size of the digest buffer
// @param sig The signature
// @param pubKey The signing public key
// @return - RIOT_SUCCESS if the signature verification succeeds
// - RIOT_FAILURE otherwise
RIOT_STATUS RIOT_DSAVerifyDigest (const uint8_t *digest, size_t digest_size,
const ecc_signature *sig, const ecc_publickey *pubKey)
{
bigval_t source;
BigIntToBigVal (&source, digest, digest_size);
if (ECDSA_Ref_verify (&source, pubKey, sig) == true) {
return RIOT_SUCCESS;
}
return RIOT_FAILURE;
}
//
// Verify DSA signature of a buffer
// @param buf The buffer to sign
// @param len The buffer len
// @param sig The signature
// @param pubKey The signing public key
// @param hash The hash engine
// @return - RIOT_SUCCESS if the signature verification succeeds
// - RIOT_FAILURE otherwise
RIOT_STATUS RIOT_DSAVerify (const uint8_t *buf, uint16_t len, const ecc_signature *sig,
const ecc_publickey *pubKey, const struct hash_engine *hash)
{
uint8_t digest[SHA256_DIGEST_LENGTH];
int status;
status = hash->calculate_sha256 (hash, buf, len, digest, sizeof (digest));
if (status != 0) {
return RIOT_FAILURE;
}
return RIOT_DSAVerifyDigest (digest, SHA256_DIGEST_LENGTH, sig, pubKey);
}
//
// Checks if the private key integer is a valid value
//
RIOT_STATUS RIOT_DSA_check_privkey (const ecc_privatekey *priv_key)
{
if (big_is_zero (priv_key) || (big_cmp (priv_key,
&orderP) >= 0) || big_is_negative (priv_key)) {
return RIOT_FAILURE;
}
return RIOT_SUCCESS;
}
//
// Checks if the public key is a valid value
//
RIOT_STATUS RIOT_DSA_check_pubkey (const ecc_keypair *key)
{
if (key->Q.infinity || !(big_is_zero (&key->d))) {
return RIOT_FAILURE;
}
return RIOT_SUCCESS;
}
//
// Encodes a signature in ASN.1 DER format
//
RIOT_STATUS RIOT_DSA_encode_signature (const ecc_signature *sig, uint8_t *buf, size_t buf_len,
int *out_len)
{
DERBuilderContext derCtx;
uint8_t encBuffer[RIOT_ECC_SIG_BYTES];
DERInitContext (&derCtx, buf, buf_len);
CHK (DERStartSequenceOrSet (&derCtx, true));
BigValToBigInt (encBuffer, &sig->r);
CHK (DERAddIntegerFromArray (&derCtx, encBuffer, RIOT_ECC_SIG_BYTES));
BigValToBigInt (encBuffer, &sig->s);
CHK (DERAddIntegerFromArray (&derCtx, encBuffer, RIOT_ECC_SIG_BYTES));
CHK (DERPopNesting (&derCtx));
ASRT (DERGetNestingDepth (&derCtx) == 0);
*out_len = DERGetEncodedLength (&derCtx);
ASRT (*out_len != 0);
return RIOT_SUCCESS;
Error:
return RIOT_FAILURE;
}
//
// Decodes an ASN.1 DER encoded R/S ECC signature component
// @param out The decoded R/S integer
// @param der_buf The buffer that stores the DER encoded R/S integer
// @param der_len The length of the buffer storing the DER encoding
// @param position The current buffer position
// @return 0 if decoding of the encoded integer succeeds
// -1 otherwise
//
static int decode_rs (bigval_t *out, const uint8_t *der_buf, size_t der_len, size_t *position)
{
size_t len;
if (*position >= der_len) {
return -1;
}
if (der_buf[*position] != 0x02) {
return -1;
}
if ((*position + 1) >= der_len) {
return -1;
}
len = der_buf[*position + 1];
if (len > (RIOT_ECC_SIG_BYTES + 1)) {
return -1;
}
(*position) += 2; //consume integer header
if (*position >= der_len) {
return -1;
}
//ignore leading zero for negative integers
if (der_buf[*position] == 0) {
(*position)++;
len -= 1;
}
if ((*position + len) > der_len) {
return -1;
}
BigIntToBigVal (out, &der_buf[*position], len);
(*position) += len;
return 0;
}
//
// Decodes an ASN.1 DER encoded signature
//
RIOT_STATUS RIOT_DSA_decode_signature (ecc_signature *rs_sig, const uint8_t *der_sig,
size_t sig_len)
{
size_t position = 0;
ASRT (DERDECReadSequence (NULL, der_sig, sig_len, &position) == RIOT_SUCCESS);
CHK (decode_rs (&rs_sig->r, der_sig, sig_len, &position));
CHK (decode_rs (&rs_sig->s, der_sig, sig_len, &position));
return RIOT_SUCCESS;
Error:
return RIOT_FAILURE;
}
//
// Computes the size in bytes of the private key
//
int RIOT_DSA_size (const ecc_keypair *key)
{
if ((key == NULL) || big_is_zero (&key->d)) {
return 0;
}
return (sizeof (orderP) - sizeof (orderP.data[0]));
}
//
// Initializes an ECC key pair using the private and public DER encoded keys
//
RIOT_STATUS RIOT_DSA_init_key_pair (ecc_keypair *private_key, ecc_keypair *public_key,
const uint8_t *der_priv_key, size_t priv_key_len, const uint8_t *der_pub_key,
size_t pub_key_len)
{
size_t pub_key_coord_bytes = (pub_key_len - 2) / 2;
if (private_key) {
ASRT (priv_key_len <= RIOT_ECC_PRIVATE_BYTES);
BigIntToBigVal (&private_key->d, der_priv_key, priv_key_len);
ASRT (pub_key_coord_bytes <= RIOT_ECC_COORD_BYTES);
BigIntToBigVal (&private_key->Q.x, &der_pub_key[2], pub_key_coord_bytes);
BigIntToBigVal (&private_key->Q.y, &der_pub_key[pub_key_coord_bytes + 2],
pub_key_coord_bytes);
private_key->Q.infinity = false;
}
if (public_key) {
ASRT (pub_key_coord_bytes <= RIOT_ECC_COORD_BYTES);
BigIntToBigVal (&public_key->Q.x, &der_pub_key[2], pub_key_coord_bytes);
BigIntToBigVal (&public_key->Q.y, &der_pub_key[pub_key_coord_bytes + 2],
pub_key_coord_bytes);
public_key->Q.infinity = false;
}
return RIOT_SUCCESS;
Error:
return RIOT_FAILURE;
}
#endif