358 lines
12 KiB
C
358 lines
12 KiB
C
// Copyright (c) 2020, Google Inc.
|
|
// SPDX-License-Identifier: ISC
|
|
|
|
// An implementation of the NIST P-256 elliptic curve point multiplication.
|
|
// 256-bit Montgomery form for 64 and 32-bit. Field operations are generated by
|
|
// Fiat, which lives in //third_party/fiat.
|
|
|
|
#include <openssl/base.h>
|
|
|
|
#include <openssl/bn.h>
|
|
#include <openssl/ec.h>
|
|
#include <openssl/err.h>
|
|
#include <openssl/mem.h>
|
|
#include <openssl/type_check.h>
|
|
|
|
#include <assert.h>
|
|
#include <string.h>
|
|
|
|
#include "../../internal.h"
|
|
#include "../delocate.h"
|
|
#include "./internal.h"
|
|
#include "ec_nistp.h"
|
|
|
|
#if defined(BORINGSSL_HAS_UINT128)
|
|
#define BORINGSSL_NISTP256_64BIT 1
|
|
#include "../../../third_party/fiat/p256_64.h"
|
|
#else
|
|
#include "../../../third_party/fiat/p256_32.h"
|
|
#endif
|
|
|
|
|
|
// utility functions, handwritten
|
|
|
|
#if defined(BORINGSSL_NISTP256_64BIT)
|
|
#define FIAT_P256_NLIMBS 4
|
|
typedef uint64_t fiat_p256_limb_t;
|
|
typedef uint64_t fiat_p256_felem[FIAT_P256_NLIMBS];
|
|
static const fiat_p256_felem fiat_p256_one = {0x1, 0xffffffff00000000,
|
|
0xffffffffffffffff, 0xfffffffe};
|
|
#else // 64BIT; else 32BIT
|
|
#define FIAT_P256_NLIMBS 8
|
|
typedef uint32_t fiat_p256_limb_t;
|
|
typedef uint32_t fiat_p256_felem[FIAT_P256_NLIMBS];
|
|
static const fiat_p256_felem fiat_p256_one = {
|
|
0x1, 0x0, 0x0, 0xffffffff, 0xffffffff, 0xffffffff, 0xfffffffe, 0x0};
|
|
#endif // 64BIT
|
|
|
|
|
|
static fiat_p256_limb_t fiat_p256_nz(
|
|
const fiat_p256_limb_t in1[FIAT_P256_NLIMBS]) {
|
|
fiat_p256_limb_t ret;
|
|
fiat_p256_nonzero(&ret, in1);
|
|
return ret;
|
|
}
|
|
|
|
static void fiat_p256_from_words(fiat_p256_felem out,
|
|
const BN_ULONG in[32 / sizeof(BN_ULONG)]) {
|
|
// Typically, |BN_ULONG| and |fiat_p256_limb_t| will be the same type, but on
|
|
// 64-bit platforms without |uint128_t|, they are different. However, on
|
|
// little-endian systems, |uint64_t[4]| and |uint32_t[8]| have the same
|
|
// layout.
|
|
OPENSSL_memcpy(out, in, 32);
|
|
}
|
|
|
|
static void fiat_p256_from_generic(fiat_p256_felem out, const EC_FELEM *in) {
|
|
fiat_p256_from_words(out, in->words);
|
|
}
|
|
|
|
static void fiat_p256_to_generic(EC_FELEM *out, const fiat_p256_felem in) {
|
|
// See |fiat_p256_from_words|.
|
|
OPENSSL_memcpy(out->words, in, 32);
|
|
}
|
|
|
|
// fiat_p256_inv_square calculates |out| = |in|^{-2}
|
|
//
|
|
// Based on Fermat's Little Theorem:
|
|
// a^p = a (mod p)
|
|
// a^{p-1} = 1 (mod p)
|
|
// a^{p-3} = a^{-2} (mod p)
|
|
static void fiat_p256_inv_square(fiat_p256_felem out,
|
|
const fiat_p256_felem in) {
|
|
// This implements the addition chain described in
|
|
// https://briansmith.org/ecc-inversion-addition-chains-01#p256_field_inversion
|
|
fiat_p256_felem x2, x3, x6, x12, x15, x30, x32;
|
|
fiat_p256_square(x2, in); // 2^2 - 2^1
|
|
fiat_p256_mul(x2, x2, in); // 2^2 - 2^0
|
|
|
|
fiat_p256_square(x3, x2); // 2^3 - 2^1
|
|
fiat_p256_mul(x3, x3, in); // 2^3 - 2^0
|
|
|
|
fiat_p256_square(x6, x3);
|
|
for (int i = 1; i < 3; i++) {
|
|
fiat_p256_square(x6, x6);
|
|
} // 2^6 - 2^3
|
|
fiat_p256_mul(x6, x6, x3); // 2^6 - 2^0
|
|
|
|
fiat_p256_square(x12, x6);
|
|
for (int i = 1; i < 6; i++) {
|
|
fiat_p256_square(x12, x12);
|
|
} // 2^12 - 2^6
|
|
fiat_p256_mul(x12, x12, x6); // 2^12 - 2^0
|
|
|
|
fiat_p256_square(x15, x12);
|
|
for (int i = 1; i < 3; i++) {
|
|
fiat_p256_square(x15, x15);
|
|
} // 2^15 - 2^3
|
|
fiat_p256_mul(x15, x15, x3); // 2^15 - 2^0
|
|
|
|
fiat_p256_square(x30, x15);
|
|
for (int i = 1; i < 15; i++) {
|
|
fiat_p256_square(x30, x30);
|
|
} // 2^30 - 2^15
|
|
fiat_p256_mul(x30, x30, x15); // 2^30 - 2^0
|
|
|
|
fiat_p256_square(x32, x30);
|
|
fiat_p256_square(x32, x32); // 2^32 - 2^2
|
|
fiat_p256_mul(x32, x32, x2); // 2^32 - 2^0
|
|
|
|
fiat_p256_felem ret;
|
|
fiat_p256_square(ret, x32);
|
|
for (int i = 1; i < 31 + 1; i++) {
|
|
fiat_p256_square(ret, ret);
|
|
} // 2^64 - 2^32
|
|
fiat_p256_mul(ret, ret, in); // 2^64 - 2^32 + 2^0
|
|
|
|
for (int i = 0; i < 96 + 32; i++) {
|
|
fiat_p256_square(ret, ret);
|
|
} // 2^192 - 2^160 + 2^128
|
|
fiat_p256_mul(ret, ret, x32); // 2^192 - 2^160 + 2^128 + 2^32 - 2^0
|
|
|
|
for (int i = 0; i < 32; i++) {
|
|
fiat_p256_square(ret, ret);
|
|
} // 2^224 - 2^192 + 2^160 + 2^64 - 2^32
|
|
fiat_p256_mul(ret, ret, x32); // 2^224 - 2^192 + 2^160 + 2^64 - 2^0
|
|
|
|
for (int i = 0; i < 30; i++) {
|
|
fiat_p256_square(ret, ret);
|
|
} // 2^254 - 2^222 + 2^190 + 2^94 - 2^30
|
|
fiat_p256_mul(ret, ret, x30); // 2^254 - 2^222 + 2^190 + 2^94 - 2^0
|
|
|
|
fiat_p256_square(ret, ret);
|
|
fiat_p256_square(out, ret); // 2^256 - 2^224 + 2^192 + 2^96 - 2^2
|
|
}
|
|
|
|
static void fiat_p256_point_double(fiat_p256_felem x_out,
|
|
fiat_p256_felem y_out,
|
|
fiat_p256_felem z_out,
|
|
const fiat_p256_felem x_in,
|
|
const fiat_p256_felem y_in,
|
|
const fiat_p256_felem z_in) {
|
|
ec_nistp_point_double(p256_methods(), x_out, y_out, z_out, x_in, y_in, z_in);
|
|
}
|
|
|
|
static void fiat_p256_point_add(fiat_p256_felem x3, fiat_p256_felem y3,
|
|
fiat_p256_felem z3, const fiat_p256_felem x1,
|
|
const fiat_p256_felem y1,
|
|
const fiat_p256_felem z1, const int mixed,
|
|
const fiat_p256_felem x2,
|
|
const fiat_p256_felem y2,
|
|
const fiat_p256_felem z2) {
|
|
ec_nistp_point_add(p256_methods(), x3, y3, z3, x1, y1, z1, mixed, x2, y2, z2);
|
|
}
|
|
|
|
#include "./p256_table.h"
|
|
|
|
DEFINE_METHOD_FUNCTION(ec_nistp_meth, p256_methods) {
|
|
out->felem_num_limbs = FIAT_P256_NLIMBS;
|
|
out->felem_num_bits = 256;
|
|
out->felem_add = fiat_p256_add;
|
|
out->felem_sub = fiat_p256_sub;
|
|
out->felem_mul = fiat_p256_mul;
|
|
out->felem_sqr = fiat_p256_square;
|
|
out->felem_neg = fiat_p256_opp;
|
|
out->felem_nz = fiat_p256_nz;
|
|
out->felem_one = fiat_p256_one;
|
|
out->point_dbl = fiat_p256_point_double;
|
|
out->point_add = fiat_p256_point_add;
|
|
out->scalar_mul_base_table = (const ec_nistp_felem_limb*) fiat_p256_g_pre_comp;
|
|
}
|
|
|
|
// OPENSSL EC_METHOD FUNCTIONS
|
|
|
|
// Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
|
|
// (X/Z^2, Y/Z^3).
|
|
static int ec_GFp_nistp256_point_get_affine_coordinates(
|
|
const EC_GROUP *group, const EC_JACOBIAN *point, EC_FELEM *x_out,
|
|
EC_FELEM *y_out) {
|
|
if (constant_time_declassify_int(
|
|
ec_GFp_simple_is_at_infinity(group, point))) {
|
|
OPENSSL_PUT_ERROR(EC, EC_R_POINT_AT_INFINITY);
|
|
return 0;
|
|
}
|
|
|
|
fiat_p256_felem z1, z2;
|
|
fiat_p256_from_generic(z1, &point->Z);
|
|
fiat_p256_inv_square(z2, z1);
|
|
|
|
if (x_out != NULL) {
|
|
fiat_p256_felem x;
|
|
fiat_p256_from_generic(x, &point->X);
|
|
fiat_p256_mul(x, x, z2);
|
|
fiat_p256_to_generic(x_out, x);
|
|
}
|
|
|
|
if (y_out != NULL) {
|
|
fiat_p256_felem y;
|
|
fiat_p256_from_generic(y, &point->Y);
|
|
fiat_p256_square(z2, z2); // z^-4
|
|
fiat_p256_mul(y, y, z1); // y * z
|
|
fiat_p256_mul(y, y, z2); // y * z^-3
|
|
fiat_p256_to_generic(y_out, y);
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
static void ec_GFp_nistp256_add(const EC_GROUP *group, EC_JACOBIAN *r,
|
|
const EC_JACOBIAN *a, const EC_JACOBIAN *b) {
|
|
fiat_p256_felem x1, y1, z1, x2, y2, z2;
|
|
fiat_p256_from_generic(x1, &a->X);
|
|
fiat_p256_from_generic(y1, &a->Y);
|
|
fiat_p256_from_generic(z1, &a->Z);
|
|
fiat_p256_from_generic(x2, &b->X);
|
|
fiat_p256_from_generic(y2, &b->Y);
|
|
fiat_p256_from_generic(z2, &b->Z);
|
|
fiat_p256_point_add(x1, y1, z1, x1, y1, z1, 0 /* both Jacobian */, x2, y2,
|
|
z2);
|
|
fiat_p256_to_generic(&r->X, x1);
|
|
fiat_p256_to_generic(&r->Y, y1);
|
|
fiat_p256_to_generic(&r->Z, z1);
|
|
}
|
|
|
|
static void ec_GFp_nistp256_dbl(const EC_GROUP *group, EC_JACOBIAN *r,
|
|
const EC_JACOBIAN *a) {
|
|
fiat_p256_felem x, y, z;
|
|
fiat_p256_from_generic(x, &a->X);
|
|
fiat_p256_from_generic(y, &a->Y);
|
|
fiat_p256_from_generic(z, &a->Z);
|
|
fiat_p256_point_double(x, y, z, x, y, z);
|
|
fiat_p256_to_generic(&r->X, x);
|
|
fiat_p256_to_generic(&r->Y, y);
|
|
fiat_p256_to_generic(&r->Z, z);
|
|
}
|
|
|
|
static void ec_GFp_nistp256_point_mul(const EC_GROUP *group, EC_JACOBIAN *r,
|
|
const EC_JACOBIAN *p,
|
|
const EC_SCALAR *scalar) {
|
|
fiat_p256_felem res[3], tmp[3];
|
|
fiat_p256_from_generic(tmp[0], &p->X);
|
|
fiat_p256_from_generic(tmp[1], &p->Y);
|
|
fiat_p256_from_generic(tmp[2], &p->Z);
|
|
|
|
ec_nistp_scalar_mul(p256_methods(), res[0], res[1], res[2], tmp[0], tmp[1], tmp[2], scalar);
|
|
|
|
fiat_p256_to_generic(&r->X, res[0]);
|
|
fiat_p256_to_generic(&r->Y, res[1]);
|
|
fiat_p256_to_generic(&r->Z, res[2]);
|
|
}
|
|
|
|
static void ec_GFp_nistp256_point_mul_base(const EC_GROUP *group,
|
|
EC_JACOBIAN *r,
|
|
const EC_SCALAR *scalar) {
|
|
fiat_p256_felem res[3];
|
|
|
|
ec_nistp_scalar_mul_base(p256_methods(), res[0], res[1], res[2], scalar);
|
|
|
|
fiat_p256_to_generic(&r->X, res[0]);
|
|
fiat_p256_to_generic(&r->Y, res[1]);
|
|
fiat_p256_to_generic(&r->Z, res[2]);
|
|
}
|
|
|
|
static void ec_GFp_nistp256_point_mul_public(const EC_GROUP *group,
|
|
EC_JACOBIAN *r,
|
|
const EC_SCALAR *g_scalar,
|
|
const EC_JACOBIAN *p,
|
|
const EC_SCALAR *p_scalar) {
|
|
fiat_p256_felem res[3], tmp[3];
|
|
fiat_p256_from_generic(tmp[0], &p->X);
|
|
fiat_p256_from_generic(tmp[1], &p->Y);
|
|
fiat_p256_from_generic(tmp[2], &p->Z);
|
|
|
|
ec_nistp_scalar_mul_public(p256_methods(), res[0], res[1], res[2], g_scalar, tmp[0], tmp[1], tmp[2], p_scalar);
|
|
|
|
fiat_p256_to_generic(&r->X, res[0]);
|
|
fiat_p256_to_generic(&r->Y, res[1]);
|
|
fiat_p256_to_generic(&r->Z, res[2]);
|
|
}
|
|
|
|
static int ec_GFp_nistp256_cmp_x_coordinate(const EC_GROUP *group,
|
|
const EC_JACOBIAN *p,
|
|
const EC_SCALAR *r) {
|
|
if (ec_GFp_simple_is_at_infinity(group, p)) {
|
|
return 0;
|
|
}
|
|
|
|
// We wish to compare X/Z^2 with r. This is equivalent to comparing X with
|
|
// r*Z^2. Note that X and Z are represented in Montgomery form, while r is
|
|
// not.
|
|
fiat_p256_felem Z2_mont;
|
|
fiat_p256_from_generic(Z2_mont, &p->Z);
|
|
fiat_p256_mul(Z2_mont, Z2_mont, Z2_mont);
|
|
|
|
fiat_p256_felem r_Z2;
|
|
fiat_p256_from_words(r_Z2, r->words); // r < order < p, so this is valid.
|
|
fiat_p256_mul(r_Z2, r_Z2, Z2_mont);
|
|
|
|
fiat_p256_felem X;
|
|
fiat_p256_from_generic(X, &p->X);
|
|
fiat_p256_from_montgomery(X, X);
|
|
|
|
if (OPENSSL_memcmp(&r_Z2, &X, sizeof(r_Z2)) == 0) {
|
|
return 1;
|
|
}
|
|
|
|
// During signing the x coefficient is reduced modulo the group order.
|
|
// Therefore there is a small possibility, less than 1/2^128, that group_order
|
|
// < p.x < P. in that case we need not only to compare against |r| but also to
|
|
// compare against r+group_order.
|
|
assert(group->field.N.width == group->order.N.width);
|
|
EC_FELEM tmp;
|
|
BN_ULONG carry =
|
|
bn_add_words(tmp.words, r->words, group->order.N.d, group->field.N.width);
|
|
if (carry == 0 &&
|
|
bn_less_than_words(tmp.words, group->field.N.d, group->field.N.width)) {
|
|
fiat_p256_from_generic(r_Z2, &tmp);
|
|
fiat_p256_mul(r_Z2, r_Z2, Z2_mont);
|
|
if (OPENSSL_memcmp(&r_Z2, &X, sizeof(r_Z2)) == 0) {
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
DEFINE_METHOD_FUNCTION(EC_METHOD, EC_GFp_nistp256_method) {
|
|
out->point_get_affine_coordinates =
|
|
ec_GFp_nistp256_point_get_affine_coordinates;
|
|
out->add = ec_GFp_nistp256_add;
|
|
out->dbl = ec_GFp_nistp256_dbl;
|
|
out->mul = ec_GFp_nistp256_point_mul;
|
|
out->mul_base = ec_GFp_nistp256_point_mul_base;
|
|
out->mul_public = ec_GFp_nistp256_point_mul_public;
|
|
out->felem_mul = ec_GFp_mont_felem_mul;
|
|
out->felem_sqr = ec_GFp_mont_felem_sqr;
|
|
out->felem_to_bytes = ec_GFp_mont_felem_to_bytes;
|
|
out->felem_from_bytes = ec_GFp_mont_felem_from_bytes;
|
|
out->felem_reduce = ec_GFp_mont_felem_reduce;
|
|
// TODO(davidben): This should use the specialized field arithmetic
|
|
// implementation, rather than the generic one.
|
|
out->felem_exp = ec_GFp_mont_felem_exp;
|
|
out->scalar_inv0_montgomery = ec_simple_scalar_inv0_montgomery;
|
|
out->scalar_to_montgomery_inv_vartime =
|
|
ec_simple_scalar_to_montgomery_inv_vartime;
|
|
out->cmp_x_coordinate = ec_GFp_nistp256_cmp_x_coordinate;
|
|
}
|
|
|
|
#undef BORINGSSL_NISTP256_64BIT
|