chore: checkpoint before Python removal

This commit is contained in:
2026-03-26 22:33:59 +00:00
parent 683cec9307
commit e568ddf82a
29972 changed files with 11269302 additions and 2 deletions

15
vendor/ring/third_party/fiat/LICENSE vendored Normal file
View File

@@ -0,0 +1,15 @@
The Apache License, Version 2.0 (Apache-2.0)
Copyright 2015-2020 the fiat-crypto authors (see the AUTHORS file)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@@ -0,0 +1,178 @@
#include <ring-core/asm_base.h>
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && \
(defined(__APPLE__) || defined(__ELF__))
.intel_syntax noprefix
.text
#if defined(__APPLE__)
.private_extern _fiat_curve25519_adx_mul
.global _fiat_curve25519_adx_mul
_fiat_curve25519_adx_mul:
#else
.type fiat_curve25519_adx_mul, @function
.hidden fiat_curve25519_adx_mul
.global fiat_curve25519_adx_mul
fiat_curve25519_adx_mul:
#endif
.cfi_startproc
_CET_ENDBR
push rbp
.cfi_adjust_cfa_offset 8
.cfi_offset rbp, -16
mov rbp, rsp
mov rax, rdx
mov rdx, [ rsi + 0x18 ]
mulx r11, r10, [ rax + 0x8 ]
mov rdx, [ rax + 0x0 ]
mov [ rsp - 0x58 ], r15
.cfi_offset r15, -16-0x58
mulx r8, rcx, [ rsi + 0x18 ]
mov rdx, [ rsi + 0x8 ]
mov [ rsp - 0x80 ], rbx
.cfi_offset rbx, -16-0x80
mulx rbx, r9, [ rax + 0x18 ]
mov rdx, [ rsi + 0x8 ]
mov [ rsp - 0x70 ], r12
.cfi_offset r12, -16-0x70
mulx r15, r12, [ rax + 0x8 ]
mov rdx, [ rsi + 0x0 ]
mov [ rsp - 0x68 ], r13
.cfi_offset r13, -16-0x68
mov [ rsp - 0x60 ], r14
.cfi_offset r14, -16-0x60
mulx r14, r13, [ rax + 0x0 ]
mov rdx, [ rax + 0x10 ]
mov [ rsp - 0x18 ], r15
mov [ rsp - 0x50 ], rdi
mulx rdi, r15, [ rsi + 0x0 ]
mov rdx, [ rax + 0x18 ]
mov [ rsp - 0x48 ], r13
mov [ rsp - 0x40 ], r9
mulx r9, r13, [ rsi + 0x0 ]
test al, al
adox rcx, rdi
mov rdx, [ rsi + 0x10 ]
mov [ rsp - 0x38 ], r13
mulx r13, rdi, [ rax + 0x8 ]
adox r10, r9
mov rdx, 0x0
adox rbx, rdx
adcx rdi, rcx
adcx r8, r10
mov r9, rdx
adcx r9, rbx
mov rdx, [ rsi + 0x10 ]
mulx r10, rcx, [ rax + 0x0 ]
mov rdx, [ rsi + 0x0 ]
mov [ rsp - 0x30 ], r15
mulx r15, rbx, [ rax + 0x8 ]
mov rdx, -0x2
inc rdx
adox rcx, r15
setc r15b
clc
adcx rcx, r12
adox r10, rdi
mov rdx, [ rax + 0x10 ]
mov [ rsp - 0x78 ], rcx
mulx rcx, rdi, [ rsi + 0x10 ]
adox rdi, r8
mov rdx, [ rax + 0x18 ]
mov [ rsp - 0x28 ], rcx
mulx rcx, r8, [ rsi + 0x10 ]
mov rdx, [ rax + 0x10 ]
mov [ rsp - 0x20 ], r8
mulx r12, r8, [ rsi + 0x18 ]
adox r8, r9
mov rdx, [ rsi + 0x8 ]
mov [ rsp - 0x10 ], r12
mulx r12, r9, [ rax + 0x10 ]
movzx rdx, r15b
lea rdx, [ rdx + rcx ]
adcx r9, r10
adcx r13, rdi
mov r15, 0x0
mov r10, r15
adox r10, rdx
mov rdx, [ rax + 0x18 ]
mulx rcx, rdi, [ rsi + 0x18 ]
adox rcx, r15
adcx r11, r8
mov rdx, r15
adcx rdx, r10
adcx rcx, r15
mov r8, rdx
mov rdx, [ rax + 0x0 ]
mulx r15, r10, [ rsi + 0x8 ]
test al, al
adox r10, r14
adcx rbx, r10
adox r15, [ rsp - 0x78 ]
adcx r15, [ rsp - 0x30 ]
adox r9, [ rsp - 0x18 ]
adcx r9, [ rsp - 0x38 ]
adox r13, [ rsp - 0x40 ]
adcx r12, r13
adox r11, [ rsp - 0x20 ]
adcx r11, [ rsp - 0x28 ]
mov rdx, 0x26
mulx rsi, r14, r12
adox rdi, r8
adcx rdi, [ rsp - 0x10 ]
mulx r10, r8, r11
mov r13, 0x0
adox rcx, r13
adcx rcx, r13
mulx r11, r12, rdi
xor rdi, rdi
adox r8, rbx
adox r12, r15
mulx rbx, r13, rcx
adcx r14, [ rsp - 0x48 ]
adox r13, r9
adox rbx, rdi
adcx rsi, r8
adcx r10, r12
adcx r11, r13
adc rbx, 0x0
mulx r9, r15, rbx
xor r9, r9
adox r15, r14
mov rdi, r9
adox rdi, rsi
mov rcx, r9
adox rcx, r10
mov r8, [ rsp - 0x50 ]
mov [ r8 + 0x8 ], rdi
mov r12, r9
adox r12, r11
mov r14, r9
cmovo r14, rdx
mov [ r8 + 0x18 ], r12
adcx r15, r14
mov [ r8 + 0x0 ], r15
mov [ r8 + 0x10 ], rcx
mov rbx, [ rsp - 0x80 ]
.cfi_restore rbx
mov r12, [ rsp - 0x70 ]
.cfi_restore r12
mov r13, [ rsp - 0x68 ]
.cfi_restore r13
mov r14, [ rsp - 0x60 ]
.cfi_restore r14
mov r15, [ rsp - 0x58 ]
.cfi_restore r15
pop rbp
.cfi_restore rbp
.cfi_adjust_cfa_offset -8
ret
.cfi_endproc
#if defined(__ELF__)
.size fiat_curve25519_adx_mul, .-fiat_curve25519_adx_mul
#endif
#endif

View File

@@ -0,0 +1,146 @@
#include <ring-core/asm_base.h>
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && \
(defined(__APPLE__) || defined(__ELF__))
.intel_syntax noprefix
.text
#if defined(__APPLE__)
.private_extern _fiat_curve25519_adx_square
.global _fiat_curve25519_adx_square
_fiat_curve25519_adx_square:
#else
.type fiat_curve25519_adx_square, @function
.hidden fiat_curve25519_adx_square
.global fiat_curve25519_adx_square
fiat_curve25519_adx_square:
#endif
.cfi_startproc
_CET_ENDBR
push rbp
.cfi_adjust_cfa_offset 8
.cfi_offset rbp, -16
mov rbp, rsp
mov rdx, [ rsi + 0x0 ]
mulx r10, rax, [ rsi + 0x8 ]
mov rdx, [ rsi + 0x0 ]
mulx rcx, r11, [ rsi + 0x10 ]
xor rdx, rdx
adox r11, r10
mov rdx, [ rsi + 0x0 ]
mulx r9, r8, [ rsi + 0x18 ]
mov rdx, [ rsi + 0x8 ]
mov [ rsp - 0x80 ], rbx
.cfi_offset rbx, -16-0x80
mulx rbx, r10, [ rsi + 0x18 ]
adox r8, rcx
mov [rsp - 0x48 ], rdi
adox r10, r9
adcx rax, rax
mov rdx, [ rsi + 0x10 ]
mulx r9, rcx, [ rsi + 0x18 ]
adox rcx, rbx
mov rdx, [ rsi + 0x10 ]
mulx rdi, rbx, [ rsi + 0x8 ]
mov rdx, 0x0
adox r9, rdx
mov [ rsp - 0x70 ], r12
.cfi_offset r12, -16-0x70
mov r12, -0x3
inc r12
adox rbx, r8
adox rdi, r10
adcx r11, r11
mov r8, rdx
adox r8, rcx
mov r10, rdx
adox r10, r9
adcx rbx, rbx
mov rdx, [ rsi + 0x0 ]
mulx r9, rcx, rdx
mov rdx, [ rsi + 0x8 ]
mov [ rsp - 0x68 ], r13
.cfi_offset r13, -16-0x68
mov [ rsp - 0x60 ], r14
.cfi_offset r14, -16-0x60
mulx r14, r13, rdx
seto dl
inc r12
adox r9, rax
adox r13, r11
adox r14, rbx
adcx rdi, rdi
mov al, dl
mov rdx, [ rsi + 0x10 ]
mulx rbx, r11, rdx
adox r11, rdi
adcx r8, r8
adox rbx, r8
adcx r10, r10
movzx rdx, al
mov rdi, 0x0
adcx rdx, rdi
movzx r8, al
lea r8, [ r8 + rdx ]
mov rdx, [ rsi + 0x18 ]
mulx rdi, rax, rdx
adox rax, r10
mov rdx, 0x26
mov [ rsp - 0x58 ], r15
.cfi_offset r15, -16-0x58
mulx r15, r10, r11
clc
adcx r10, rcx
mulx r11, rcx, rbx
adox r8, rdi
mulx rdi, rbx, r8
inc r12
adox rcx, r9
mulx r8, r9, rax
adcx r15, rcx
adox r9, r13
adcx r11, r9
adox rbx, r14
adox rdi, r12
adcx r8, rbx
adc rdi, 0x0
mulx r14, r13, rdi
test al, al
mov rdi, [ rsp - 0x48 ]
adox r13, r10
mov r14, r12
adox r14, r15
mov [ rdi + 0x8 ], r14
mov rax, r12
adox rax, r11
mov r10, r12
adox r10, r8
mov [ rdi + 0x10 ], rax
mov rcx, r12
cmovo rcx, rdx
adcx r13, rcx
mov [ rdi + 0x0 ], r13
mov [ rdi + 0x18 ], r10
mov rbx, [ rsp - 0x80 ]
.cfi_restore rbx
mov r12, [ rsp - 0x70 ]
.cfi_restore r12
mov r13, [ rsp - 0x68 ]
.cfi_restore r13
mov r14, [ rsp - 0x60 ]
.cfi_restore r14
mov r15, [ rsp - 0x58 ]
.cfi_restore r15
pop rbp
.cfi_restore rbp
.cfi_adjust_cfa_offset -8
ret
.cfi_endproc
#if defined(__ELF__)
.size fiat_curve25519_adx_square, .-fiat_curve25519_adx_square
#endif
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,916 @@
/* Autogenerated: 'src/ExtractionOCaml/unsaturated_solinas' --inline --static --use-value-barrier 25519 64 '(auto)' '2^255 - 19' carry_mul carry_square carry add sub opp selectznz to_bytes from_bytes relax carry_scmul121666 */
/* curve description: 25519 */
/* machine_wordsize = 64 (from "64") */
/* requested operations: carry_mul, carry_square, carry, add, sub, opp, selectznz, to_bytes, from_bytes, relax, carry_scmul121666 */
/* n = 5 (from "(auto)") */
/* s-c = 2^255 - [(1, 19)] (from "2^255 - 19") */
/* tight_bounds_multiplier = 1 (from "") */
/* */
/* Computed values: */
/* carry_chain = [0, 1, 2, 3, 4, 0, 1] */
/* eval z = z[0] + (z[1] << 51) + (z[2] << 102) + (z[3] << 153) + (z[4] << 204) */
/* bytes_eval z = z[0] + (z[1] << 8) + (z[2] << 16) + (z[3] << 24) + (z[4] << 32) + (z[5] << 40) + (z[6] << 48) + (z[7] << 56) + (z[8] << 64) + (z[9] << 72) + (z[10] << 80) + (z[11] << 88) + (z[12] << 96) + (z[13] << 104) + (z[14] << 112) + (z[15] << 120) + (z[16] << 128) + (z[17] << 136) + (z[18] << 144) + (z[19] << 152) + (z[20] << 160) + (z[21] << 168) + (z[22] << 176) + (z[23] << 184) + (z[24] << 192) + (z[25] << 200) + (z[26] << 208) + (z[27] << 216) + (z[28] << 224) + (z[29] << 232) + (z[30] << 240) + (z[31] << 248) */
/* balance = [0xfffffffffffda, 0xffffffffffffe, 0xffffffffffffe, 0xffffffffffffe, 0xffffffffffffe] */
#include <stdint.h>
typedef unsigned char fiat_25519_uint1;
typedef signed char fiat_25519_int1;
#if defined(__GNUC__) || defined(__clang__)
# define FIAT_25519_FIAT_EXTENSION __extension__
# define FIAT_25519_FIAT_INLINE __inline__
#else
# define FIAT_25519_FIAT_EXTENSION
# define FIAT_25519_FIAT_INLINE
#endif
FIAT_25519_FIAT_EXTENSION typedef signed __int128 fiat_25519_int128;
FIAT_25519_FIAT_EXTENSION typedef unsigned __int128 fiat_25519_uint128;
/* The type fiat_25519_loose_field_element is a field element with loose bounds. */
/* Bounds: [[0x0 ~> 0x18000000000000], [0x0 ~> 0x18000000000000], [0x0 ~> 0x18000000000000], [0x0 ~> 0x18000000000000], [0x0 ~> 0x18000000000000]] */
typedef uint64_t fiat_25519_loose_field_element[5];
/* The type fiat_25519_tight_field_element is a field element with tight bounds. */
/* Bounds: [[0x0 ~> 0x8000000000000], [0x0 ~> 0x8000000000000], [0x0 ~> 0x8000000000000], [0x0 ~> 0x8000000000000], [0x0 ~> 0x8000000000000]] */
typedef uint64_t fiat_25519_tight_field_element[5];
#if (-1 & 3) != 3
#error "This code only works on a two's complement system"
#endif
#if !defined(FIAT_25519_NO_ASM) && (defined(__GNUC__) || defined(__clang__))
static __inline__ uint64_t fiat_25519_value_barrier_u64(uint64_t a) {
__asm__("" : "+r"(a) : /* no inputs */);
return a;
}
#else
# define fiat_25519_value_barrier_u64(x) (x)
#endif
/*
* The function fiat_25519_addcarryx_u51 is an addition with carry.
*
* Postconditions:
* out1 = (arg1 + arg2 + arg3) mod 2^51
* out2 = ⌊(arg1 + arg2 + arg3) / 2^51⌋
*
* Input Bounds:
* arg1: [0x0 ~> 0x1]
* arg2: [0x0 ~> 0x7ffffffffffff]
* arg3: [0x0 ~> 0x7ffffffffffff]
* Output Bounds:
* out1: [0x0 ~> 0x7ffffffffffff]
* out2: [0x0 ~> 0x1]
*/
static FIAT_25519_FIAT_INLINE void fiat_25519_addcarryx_u51(uint64_t* out1, fiat_25519_uint1* out2, fiat_25519_uint1 arg1, uint64_t arg2, uint64_t arg3) {
uint64_t x1;
uint64_t x2;
fiat_25519_uint1 x3;
x1 = ((arg1 + arg2) + arg3);
x2 = (x1 & UINT64_C(0x7ffffffffffff));
x3 = (fiat_25519_uint1)(x1 >> 51);
*out1 = x2;
*out2 = x3;
}
/*
* The function fiat_25519_subborrowx_u51 is a subtraction with borrow.
*
* Postconditions:
* out1 = (-arg1 + arg2 + -arg3) mod 2^51
* out2 = -⌊(-arg1 + arg2 + -arg3) / 2^51⌋
*
* Input Bounds:
* arg1: [0x0 ~> 0x1]
* arg2: [0x0 ~> 0x7ffffffffffff]
* arg3: [0x0 ~> 0x7ffffffffffff]
* Output Bounds:
* out1: [0x0 ~> 0x7ffffffffffff]
* out2: [0x0 ~> 0x1]
*/
static FIAT_25519_FIAT_INLINE void fiat_25519_subborrowx_u51(uint64_t* out1, fiat_25519_uint1* out2, fiat_25519_uint1 arg1, uint64_t arg2, uint64_t arg3) {
int64_t x1;
fiat_25519_int1 x2;
uint64_t x3;
x1 = ((int64_t)(arg2 - (int64_t)arg1) - (int64_t)arg3);
x2 = (fiat_25519_int1)(x1 >> 51);
x3 = (x1 & UINT64_C(0x7ffffffffffff));
*out1 = x3;
*out2 = (fiat_25519_uint1)(0x0 - x2);
}
/*
* The function fiat_25519_cmovznz_u64 is a single-word conditional move.
*
* Postconditions:
* out1 = (if arg1 = 0 then arg2 else arg3)
*
* Input Bounds:
* arg1: [0x0 ~> 0x1]
* arg2: [0x0 ~> 0xffffffffffffffff]
* arg3: [0x0 ~> 0xffffffffffffffff]
* Output Bounds:
* out1: [0x0 ~> 0xffffffffffffffff]
*/
static FIAT_25519_FIAT_INLINE void fiat_25519_cmovznz_u64(uint64_t* out1, fiat_25519_uint1 arg1, uint64_t arg2, uint64_t arg3) {
fiat_25519_uint1 x1;
uint64_t x2;
uint64_t x3;
x1 = (!(!arg1));
x2 = ((fiat_25519_int1)(0x0 - x1) & UINT64_C(0xffffffffffffffff));
x3 = ((fiat_25519_value_barrier_u64(x2) & arg3) | (fiat_25519_value_barrier_u64((~x2)) & arg2));
*out1 = x3;
}
/*
* The function fiat_25519_carry_mul multiplies two field elements and reduces the result.
*
* Postconditions:
* eval out1 mod m = (eval arg1 * eval arg2) mod m
*
*/
static FIAT_25519_FIAT_INLINE void fiat_25519_carry_mul(fiat_25519_tight_field_element out1, const fiat_25519_loose_field_element arg1, const fiat_25519_loose_field_element arg2) {
fiat_25519_uint128 x1;
fiat_25519_uint128 x2;
fiat_25519_uint128 x3;
fiat_25519_uint128 x4;
fiat_25519_uint128 x5;
fiat_25519_uint128 x6;
fiat_25519_uint128 x7;
fiat_25519_uint128 x8;
fiat_25519_uint128 x9;
fiat_25519_uint128 x10;
fiat_25519_uint128 x11;
fiat_25519_uint128 x12;
fiat_25519_uint128 x13;
fiat_25519_uint128 x14;
fiat_25519_uint128 x15;
fiat_25519_uint128 x16;
fiat_25519_uint128 x17;
fiat_25519_uint128 x18;
fiat_25519_uint128 x19;
fiat_25519_uint128 x20;
fiat_25519_uint128 x21;
fiat_25519_uint128 x22;
fiat_25519_uint128 x23;
fiat_25519_uint128 x24;
fiat_25519_uint128 x25;
fiat_25519_uint128 x26;
uint64_t x27;
uint64_t x28;
fiat_25519_uint128 x29;
fiat_25519_uint128 x30;
fiat_25519_uint128 x31;
fiat_25519_uint128 x32;
fiat_25519_uint128 x33;
uint64_t x34;
uint64_t x35;
fiat_25519_uint128 x36;
uint64_t x37;
uint64_t x38;
fiat_25519_uint128 x39;
uint64_t x40;
uint64_t x41;
fiat_25519_uint128 x42;
uint64_t x43;
uint64_t x44;
uint64_t x45;
uint64_t x46;
uint64_t x47;
uint64_t x48;
uint64_t x49;
fiat_25519_uint1 x50;
uint64_t x51;
uint64_t x52;
x1 = ((fiat_25519_uint128)(arg1[4]) * ((arg2[4]) * UINT8_C(0x13)));
x2 = ((fiat_25519_uint128)(arg1[4]) * ((arg2[3]) * UINT8_C(0x13)));
x3 = ((fiat_25519_uint128)(arg1[4]) * ((arg2[2]) * UINT8_C(0x13)));
x4 = ((fiat_25519_uint128)(arg1[4]) * ((arg2[1]) * UINT8_C(0x13)));
x5 = ((fiat_25519_uint128)(arg1[3]) * ((arg2[4]) * UINT8_C(0x13)));
x6 = ((fiat_25519_uint128)(arg1[3]) * ((arg2[3]) * UINT8_C(0x13)));
x7 = ((fiat_25519_uint128)(arg1[3]) * ((arg2[2]) * UINT8_C(0x13)));
x8 = ((fiat_25519_uint128)(arg1[2]) * ((arg2[4]) * UINT8_C(0x13)));
x9 = ((fiat_25519_uint128)(arg1[2]) * ((arg2[3]) * UINT8_C(0x13)));
x10 = ((fiat_25519_uint128)(arg1[1]) * ((arg2[4]) * UINT8_C(0x13)));
x11 = ((fiat_25519_uint128)(arg1[4]) * (arg2[0]));
x12 = ((fiat_25519_uint128)(arg1[3]) * (arg2[1]));
x13 = ((fiat_25519_uint128)(arg1[3]) * (arg2[0]));
x14 = ((fiat_25519_uint128)(arg1[2]) * (arg2[2]));
x15 = ((fiat_25519_uint128)(arg1[2]) * (arg2[1]));
x16 = ((fiat_25519_uint128)(arg1[2]) * (arg2[0]));
x17 = ((fiat_25519_uint128)(arg1[1]) * (arg2[3]));
x18 = ((fiat_25519_uint128)(arg1[1]) * (arg2[2]));
x19 = ((fiat_25519_uint128)(arg1[1]) * (arg2[1]));
x20 = ((fiat_25519_uint128)(arg1[1]) * (arg2[0]));
x21 = ((fiat_25519_uint128)(arg1[0]) * (arg2[4]));
x22 = ((fiat_25519_uint128)(arg1[0]) * (arg2[3]));
x23 = ((fiat_25519_uint128)(arg1[0]) * (arg2[2]));
x24 = ((fiat_25519_uint128)(arg1[0]) * (arg2[1]));
x25 = ((fiat_25519_uint128)(arg1[0]) * (arg2[0]));
x26 = (x25 + (x10 + (x9 + (x7 + x4))));
x27 = (uint64_t)(x26 >> 51);
x28 = (uint64_t)(x26 & UINT64_C(0x7ffffffffffff));
x29 = (x21 + (x17 + (x14 + (x12 + x11))));
x30 = (x22 + (x18 + (x15 + (x13 + x1))));
x31 = (x23 + (x19 + (x16 + (x5 + x2))));
x32 = (x24 + (x20 + (x8 + (x6 + x3))));
x33 = (x27 + x32);
x34 = (uint64_t)(x33 >> 51);
x35 = (uint64_t)(x33 & UINT64_C(0x7ffffffffffff));
x36 = (x34 + x31);
x37 = (uint64_t)(x36 >> 51);
x38 = (uint64_t)(x36 & UINT64_C(0x7ffffffffffff));
x39 = (x37 + x30);
x40 = (uint64_t)(x39 >> 51);
x41 = (uint64_t)(x39 & UINT64_C(0x7ffffffffffff));
x42 = (x40 + x29);
x43 = (uint64_t)(x42 >> 51);
x44 = (uint64_t)(x42 & UINT64_C(0x7ffffffffffff));
x45 = (x43 * UINT8_C(0x13));
x46 = (x28 + x45);
x47 = (x46 >> 51);
x48 = (x46 & UINT64_C(0x7ffffffffffff));
x49 = (x47 + x35);
x50 = (fiat_25519_uint1)(x49 >> 51);
x51 = (x49 & UINT64_C(0x7ffffffffffff));
x52 = (x50 + x38);
out1[0] = x48;
out1[1] = x51;
out1[2] = x52;
out1[3] = x41;
out1[4] = x44;
}
/*
* The function fiat_25519_carry_square squares a field element and reduces the result.
*
* Postconditions:
* eval out1 mod m = (eval arg1 * eval arg1) mod m
*
*/
static FIAT_25519_FIAT_INLINE void fiat_25519_carry_square(fiat_25519_tight_field_element out1, const fiat_25519_loose_field_element arg1) {
uint64_t x1;
uint64_t x2;
uint64_t x3;
uint64_t x4;
uint64_t x5;
uint64_t x6;
uint64_t x7;
uint64_t x8;
fiat_25519_uint128 x9;
fiat_25519_uint128 x10;
fiat_25519_uint128 x11;
fiat_25519_uint128 x12;
fiat_25519_uint128 x13;
fiat_25519_uint128 x14;
fiat_25519_uint128 x15;
fiat_25519_uint128 x16;
fiat_25519_uint128 x17;
fiat_25519_uint128 x18;
fiat_25519_uint128 x19;
fiat_25519_uint128 x20;
fiat_25519_uint128 x21;
fiat_25519_uint128 x22;
fiat_25519_uint128 x23;
fiat_25519_uint128 x24;
uint64_t x25;
uint64_t x26;
fiat_25519_uint128 x27;
fiat_25519_uint128 x28;
fiat_25519_uint128 x29;
fiat_25519_uint128 x30;
fiat_25519_uint128 x31;
uint64_t x32;
uint64_t x33;
fiat_25519_uint128 x34;
uint64_t x35;
uint64_t x36;
fiat_25519_uint128 x37;
uint64_t x38;
uint64_t x39;
fiat_25519_uint128 x40;
uint64_t x41;
uint64_t x42;
uint64_t x43;
uint64_t x44;
uint64_t x45;
uint64_t x46;
uint64_t x47;
fiat_25519_uint1 x48;
uint64_t x49;
uint64_t x50;
x1 = ((arg1[4]) * UINT8_C(0x13));
x2 = (x1 * 0x2);
x3 = ((arg1[4]) * 0x2);
x4 = ((arg1[3]) * UINT8_C(0x13));
x5 = (x4 * 0x2);
x6 = ((arg1[3]) * 0x2);
x7 = ((arg1[2]) * 0x2);
x8 = ((arg1[1]) * 0x2);
x9 = ((fiat_25519_uint128)(arg1[4]) * x1);
x10 = ((fiat_25519_uint128)(arg1[3]) * x2);
x11 = ((fiat_25519_uint128)(arg1[3]) * x4);
x12 = ((fiat_25519_uint128)(arg1[2]) * x2);
x13 = ((fiat_25519_uint128)(arg1[2]) * x5);
x14 = ((fiat_25519_uint128)(arg1[2]) * (arg1[2]));
x15 = ((fiat_25519_uint128)(arg1[1]) * x2);
x16 = ((fiat_25519_uint128)(arg1[1]) * x6);
x17 = ((fiat_25519_uint128)(arg1[1]) * x7);
x18 = ((fiat_25519_uint128)(arg1[1]) * (arg1[1]));
x19 = ((fiat_25519_uint128)(arg1[0]) * x3);
x20 = ((fiat_25519_uint128)(arg1[0]) * x6);
x21 = ((fiat_25519_uint128)(arg1[0]) * x7);
x22 = ((fiat_25519_uint128)(arg1[0]) * x8);
x23 = ((fiat_25519_uint128)(arg1[0]) * (arg1[0]));
x24 = (x23 + (x15 + x13));
x25 = (uint64_t)(x24 >> 51);
x26 = (uint64_t)(x24 & UINT64_C(0x7ffffffffffff));
x27 = (x19 + (x16 + x14));
x28 = (x20 + (x17 + x9));
x29 = (x21 + (x18 + x10));
x30 = (x22 + (x12 + x11));
x31 = (x25 + x30);
x32 = (uint64_t)(x31 >> 51);
x33 = (uint64_t)(x31 & UINT64_C(0x7ffffffffffff));
x34 = (x32 + x29);
x35 = (uint64_t)(x34 >> 51);
x36 = (uint64_t)(x34 & UINT64_C(0x7ffffffffffff));
x37 = (x35 + x28);
x38 = (uint64_t)(x37 >> 51);
x39 = (uint64_t)(x37 & UINT64_C(0x7ffffffffffff));
x40 = (x38 + x27);
x41 = (uint64_t)(x40 >> 51);
x42 = (uint64_t)(x40 & UINT64_C(0x7ffffffffffff));
x43 = (x41 * UINT8_C(0x13));
x44 = (x26 + x43);
x45 = (x44 >> 51);
x46 = (x44 & UINT64_C(0x7ffffffffffff));
x47 = (x45 + x33);
x48 = (fiat_25519_uint1)(x47 >> 51);
x49 = (x47 & UINT64_C(0x7ffffffffffff));
x50 = (x48 + x36);
out1[0] = x46;
out1[1] = x49;
out1[2] = x50;
out1[3] = x39;
out1[4] = x42;
}
/*
* The function fiat_25519_carry reduces a field element.
*
* Postconditions:
* eval out1 mod m = eval arg1 mod m
*
*/
static FIAT_25519_FIAT_INLINE void fiat_25519_carry(fiat_25519_tight_field_element out1, const fiat_25519_loose_field_element arg1) {
uint64_t x1;
uint64_t x2;
uint64_t x3;
uint64_t x4;
uint64_t x5;
uint64_t x6;
uint64_t x7;
uint64_t x8;
uint64_t x9;
uint64_t x10;
uint64_t x11;
uint64_t x12;
x1 = (arg1[0]);
x2 = ((x1 >> 51) + (arg1[1]));
x3 = ((x2 >> 51) + (arg1[2]));
x4 = ((x3 >> 51) + (arg1[3]));
x5 = ((x4 >> 51) + (arg1[4]));
x6 = ((x1 & UINT64_C(0x7ffffffffffff)) + ((x5 >> 51) * UINT8_C(0x13)));
x7 = ((fiat_25519_uint1)(x6 >> 51) + (x2 & UINT64_C(0x7ffffffffffff)));
x8 = (x6 & UINT64_C(0x7ffffffffffff));
x9 = (x7 & UINT64_C(0x7ffffffffffff));
x10 = ((fiat_25519_uint1)(x7 >> 51) + (x3 & UINT64_C(0x7ffffffffffff)));
x11 = (x4 & UINT64_C(0x7ffffffffffff));
x12 = (x5 & UINT64_C(0x7ffffffffffff));
out1[0] = x8;
out1[1] = x9;
out1[2] = x10;
out1[3] = x11;
out1[4] = x12;
}
/*
* The function fiat_25519_add adds two field elements.
*
* Postconditions:
* eval out1 mod m = (eval arg1 + eval arg2) mod m
*
*/
static FIAT_25519_FIAT_INLINE void fiat_25519_add(fiat_25519_loose_field_element out1, const fiat_25519_tight_field_element arg1, const fiat_25519_tight_field_element arg2) {
uint64_t x1;
uint64_t x2;
uint64_t x3;
uint64_t x4;
uint64_t x5;
x1 = ((arg1[0]) + (arg2[0]));
x2 = ((arg1[1]) + (arg2[1]));
x3 = ((arg1[2]) + (arg2[2]));
x4 = ((arg1[3]) + (arg2[3]));
x5 = ((arg1[4]) + (arg2[4]));
out1[0] = x1;
out1[1] = x2;
out1[2] = x3;
out1[3] = x4;
out1[4] = x5;
}
/*
* The function fiat_25519_sub subtracts two field elements.
*
* Postconditions:
* eval out1 mod m = (eval arg1 - eval arg2) mod m
*
*/
static FIAT_25519_FIAT_INLINE void fiat_25519_sub(fiat_25519_loose_field_element out1, const fiat_25519_tight_field_element arg1, const fiat_25519_tight_field_element arg2) {
uint64_t x1;
uint64_t x2;
uint64_t x3;
uint64_t x4;
uint64_t x5;
x1 = ((UINT64_C(0xfffffffffffda) + (arg1[0])) - (arg2[0]));
x2 = ((UINT64_C(0xffffffffffffe) + (arg1[1])) - (arg2[1]));
x3 = ((UINT64_C(0xffffffffffffe) + (arg1[2])) - (arg2[2]));
x4 = ((UINT64_C(0xffffffffffffe) + (arg1[3])) - (arg2[3]));
x5 = ((UINT64_C(0xffffffffffffe) + (arg1[4])) - (arg2[4]));
out1[0] = x1;
out1[1] = x2;
out1[2] = x3;
out1[3] = x4;
out1[4] = x5;
}
/*
* The function fiat_25519_opp negates a field element.
*
* Postconditions:
* eval out1 mod m = -eval arg1 mod m
*
*/
static FIAT_25519_FIAT_INLINE void fiat_25519_opp(fiat_25519_loose_field_element out1, const fiat_25519_tight_field_element arg1) {
uint64_t x1;
uint64_t x2;
uint64_t x3;
uint64_t x4;
uint64_t x5;
x1 = (UINT64_C(0xfffffffffffda) - (arg1[0]));
x2 = (UINT64_C(0xffffffffffffe) - (arg1[1]));
x3 = (UINT64_C(0xffffffffffffe) - (arg1[2]));
x4 = (UINT64_C(0xffffffffffffe) - (arg1[3]));
x5 = (UINT64_C(0xffffffffffffe) - (arg1[4]));
out1[0] = x1;
out1[1] = x2;
out1[2] = x3;
out1[3] = x4;
out1[4] = x5;
}
/*
* The function fiat_25519_to_bytes serializes a field element to bytes in little-endian order.
*
* Postconditions:
* out1 = map (λ x, ⌊((eval arg1 mod m) mod 2^(8 * (x + 1))) / 2^(8 * x)⌋) [0..31]
*
* Output Bounds:
* out1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0x7f]]
*/
static FIAT_25519_FIAT_INLINE void fiat_25519_to_bytes(uint8_t out1[32], const fiat_25519_tight_field_element arg1) {
uint64_t x1;
fiat_25519_uint1 x2;
uint64_t x3;
fiat_25519_uint1 x4;
uint64_t x5;
fiat_25519_uint1 x6;
uint64_t x7;
fiat_25519_uint1 x8;
uint64_t x9;
fiat_25519_uint1 x10;
uint64_t x11;
uint64_t x12;
fiat_25519_uint1 x13;
uint64_t x14;
fiat_25519_uint1 x15;
uint64_t x16;
fiat_25519_uint1 x17;
uint64_t x18;
fiat_25519_uint1 x19;
uint64_t x20;
fiat_25519_uint1 x21;
uint64_t x22;
uint64_t x23;
uint64_t x24;
uint64_t x25;
uint8_t x26;
uint64_t x27;
uint8_t x28;
uint64_t x29;
uint8_t x30;
uint64_t x31;
uint8_t x32;
uint64_t x33;
uint8_t x34;
uint64_t x35;
uint8_t x36;
uint8_t x37;
uint64_t x38;
uint8_t x39;
uint64_t x40;
uint8_t x41;
uint64_t x42;
uint8_t x43;
uint64_t x44;
uint8_t x45;
uint64_t x46;
uint8_t x47;
uint64_t x48;
uint8_t x49;
uint8_t x50;
uint64_t x51;
uint8_t x52;
uint64_t x53;
uint8_t x54;
uint64_t x55;
uint8_t x56;
uint64_t x57;
uint8_t x58;
uint64_t x59;
uint8_t x60;
uint64_t x61;
uint8_t x62;
uint64_t x63;
uint8_t x64;
fiat_25519_uint1 x65;
uint64_t x66;
uint8_t x67;
uint64_t x68;
uint8_t x69;
uint64_t x70;
uint8_t x71;
uint64_t x72;
uint8_t x73;
uint64_t x74;
uint8_t x75;
uint64_t x76;
uint8_t x77;
uint8_t x78;
uint64_t x79;
uint8_t x80;
uint64_t x81;
uint8_t x82;
uint64_t x83;
uint8_t x84;
uint64_t x85;
uint8_t x86;
uint64_t x87;
uint8_t x88;
uint64_t x89;
uint8_t x90;
uint8_t x91;
fiat_25519_subborrowx_u51(&x1, &x2, 0x0, (arg1[0]), UINT64_C(0x7ffffffffffed));
fiat_25519_subborrowx_u51(&x3, &x4, x2, (arg1[1]), UINT64_C(0x7ffffffffffff));
fiat_25519_subborrowx_u51(&x5, &x6, x4, (arg1[2]), UINT64_C(0x7ffffffffffff));
fiat_25519_subborrowx_u51(&x7, &x8, x6, (arg1[3]), UINT64_C(0x7ffffffffffff));
fiat_25519_subborrowx_u51(&x9, &x10, x8, (arg1[4]), UINT64_C(0x7ffffffffffff));
fiat_25519_cmovznz_u64(&x11, x10, 0x0, UINT64_C(0xffffffffffffffff));
fiat_25519_addcarryx_u51(&x12, &x13, 0x0, x1, (x11 & UINT64_C(0x7ffffffffffed)));
fiat_25519_addcarryx_u51(&x14, &x15, x13, x3, (x11 & UINT64_C(0x7ffffffffffff)));
fiat_25519_addcarryx_u51(&x16, &x17, x15, x5, (x11 & UINT64_C(0x7ffffffffffff)));
fiat_25519_addcarryx_u51(&x18, &x19, x17, x7, (x11 & UINT64_C(0x7ffffffffffff)));
fiat_25519_addcarryx_u51(&x20, &x21, x19, x9, (x11 & UINT64_C(0x7ffffffffffff)));
x22 = (x20 << 4);
x23 = (x18 * (uint64_t)0x2);
x24 = (x16 << 6);
x25 = (x14 << 3);
x26 = (uint8_t)(x12 & UINT8_C(0xff));
x27 = (x12 >> 8);
x28 = (uint8_t)(x27 & UINT8_C(0xff));
x29 = (x27 >> 8);
x30 = (uint8_t)(x29 & UINT8_C(0xff));
x31 = (x29 >> 8);
x32 = (uint8_t)(x31 & UINT8_C(0xff));
x33 = (x31 >> 8);
x34 = (uint8_t)(x33 & UINT8_C(0xff));
x35 = (x33 >> 8);
x36 = (uint8_t)(x35 & UINT8_C(0xff));
x37 = (uint8_t)(x35 >> 8);
x38 = (x25 + (uint64_t)x37);
x39 = (uint8_t)(x38 & UINT8_C(0xff));
x40 = (x38 >> 8);
x41 = (uint8_t)(x40 & UINT8_C(0xff));
x42 = (x40 >> 8);
x43 = (uint8_t)(x42 & UINT8_C(0xff));
x44 = (x42 >> 8);
x45 = (uint8_t)(x44 & UINT8_C(0xff));
x46 = (x44 >> 8);
x47 = (uint8_t)(x46 & UINT8_C(0xff));
x48 = (x46 >> 8);
x49 = (uint8_t)(x48 & UINT8_C(0xff));
x50 = (uint8_t)(x48 >> 8);
x51 = (x24 + (uint64_t)x50);
x52 = (uint8_t)(x51 & UINT8_C(0xff));
x53 = (x51 >> 8);
x54 = (uint8_t)(x53 & UINT8_C(0xff));
x55 = (x53 >> 8);
x56 = (uint8_t)(x55 & UINT8_C(0xff));
x57 = (x55 >> 8);
x58 = (uint8_t)(x57 & UINT8_C(0xff));
x59 = (x57 >> 8);
x60 = (uint8_t)(x59 & UINT8_C(0xff));
x61 = (x59 >> 8);
x62 = (uint8_t)(x61 & UINT8_C(0xff));
x63 = (x61 >> 8);
x64 = (uint8_t)(x63 & UINT8_C(0xff));
x65 = (fiat_25519_uint1)(x63 >> 8);
x66 = (x23 + (uint64_t)x65);
x67 = (uint8_t)(x66 & UINT8_C(0xff));
x68 = (x66 >> 8);
x69 = (uint8_t)(x68 & UINT8_C(0xff));
x70 = (x68 >> 8);
x71 = (uint8_t)(x70 & UINT8_C(0xff));
x72 = (x70 >> 8);
x73 = (uint8_t)(x72 & UINT8_C(0xff));
x74 = (x72 >> 8);
x75 = (uint8_t)(x74 & UINT8_C(0xff));
x76 = (x74 >> 8);
x77 = (uint8_t)(x76 & UINT8_C(0xff));
x78 = (uint8_t)(x76 >> 8);
x79 = (x22 + (uint64_t)x78);
x80 = (uint8_t)(x79 & UINT8_C(0xff));
x81 = (x79 >> 8);
x82 = (uint8_t)(x81 & UINT8_C(0xff));
x83 = (x81 >> 8);
x84 = (uint8_t)(x83 & UINT8_C(0xff));
x85 = (x83 >> 8);
x86 = (uint8_t)(x85 & UINT8_C(0xff));
x87 = (x85 >> 8);
x88 = (uint8_t)(x87 & UINT8_C(0xff));
x89 = (x87 >> 8);
x90 = (uint8_t)(x89 & UINT8_C(0xff));
x91 = (uint8_t)(x89 >> 8);
out1[0] = x26;
out1[1] = x28;
out1[2] = x30;
out1[3] = x32;
out1[4] = x34;
out1[5] = x36;
out1[6] = x39;
out1[7] = x41;
out1[8] = x43;
out1[9] = x45;
out1[10] = x47;
out1[11] = x49;
out1[12] = x52;
out1[13] = x54;
out1[14] = x56;
out1[15] = x58;
out1[16] = x60;
out1[17] = x62;
out1[18] = x64;
out1[19] = x67;
out1[20] = x69;
out1[21] = x71;
out1[22] = x73;
out1[23] = x75;
out1[24] = x77;
out1[25] = x80;
out1[26] = x82;
out1[27] = x84;
out1[28] = x86;
out1[29] = x88;
out1[30] = x90;
out1[31] = x91;
}
/*
* The function fiat_25519_from_bytes deserializes a field element from bytes in little-endian order.
*
* Postconditions:
* eval out1 mod m = bytes_eval arg1 mod m
*
* Input Bounds:
* arg1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0x7f]]
*/
static FIAT_25519_FIAT_INLINE void fiat_25519_from_bytes(fiat_25519_tight_field_element out1, const uint8_t arg1[32]) {
uint64_t x1;
uint64_t x2;
uint64_t x3;
uint64_t x4;
uint64_t x5;
uint64_t x6;
uint64_t x7;
uint64_t x8;
uint64_t x9;
uint64_t x10;
uint64_t x11;
uint64_t x12;
uint64_t x13;
uint64_t x14;
uint64_t x15;
uint64_t x16;
uint64_t x17;
uint64_t x18;
uint64_t x19;
uint64_t x20;
uint64_t x21;
uint64_t x22;
uint64_t x23;
uint64_t x24;
uint64_t x25;
uint64_t x26;
uint64_t x27;
uint64_t x28;
uint64_t x29;
uint64_t x30;
uint64_t x31;
uint8_t x32;
uint64_t x33;
uint64_t x34;
uint64_t x35;
uint64_t x36;
uint64_t x37;
uint64_t x38;
uint64_t x39;
uint8_t x40;
uint64_t x41;
uint64_t x42;
uint64_t x43;
uint64_t x44;
uint64_t x45;
uint64_t x46;
uint64_t x47;
uint8_t x48;
uint64_t x49;
uint64_t x50;
uint64_t x51;
uint64_t x52;
uint64_t x53;
uint64_t x54;
uint64_t x55;
uint64_t x56;
uint8_t x57;
uint64_t x58;
uint64_t x59;
uint64_t x60;
uint64_t x61;
uint64_t x62;
uint64_t x63;
uint64_t x64;
uint8_t x65;
uint64_t x66;
uint64_t x67;
uint64_t x68;
uint64_t x69;
uint64_t x70;
uint64_t x71;
x1 = ((uint64_t)(arg1[31]) << 44);
x2 = ((uint64_t)(arg1[30]) << 36);
x3 = ((uint64_t)(arg1[29]) << 28);
x4 = ((uint64_t)(arg1[28]) << 20);
x5 = ((uint64_t)(arg1[27]) << 12);
x6 = ((uint64_t)(arg1[26]) << 4);
x7 = ((uint64_t)(arg1[25]) << 47);
x8 = ((uint64_t)(arg1[24]) << 39);
x9 = ((uint64_t)(arg1[23]) << 31);
x10 = ((uint64_t)(arg1[22]) << 23);
x11 = ((uint64_t)(arg1[21]) << 15);
x12 = ((uint64_t)(arg1[20]) << 7);
x13 = ((uint64_t)(arg1[19]) << 50);
x14 = ((uint64_t)(arg1[18]) << 42);
x15 = ((uint64_t)(arg1[17]) << 34);
x16 = ((uint64_t)(arg1[16]) << 26);
x17 = ((uint64_t)(arg1[15]) << 18);
x18 = ((uint64_t)(arg1[14]) << 10);
x19 = ((uint64_t)(arg1[13]) << 2);
x20 = ((uint64_t)(arg1[12]) << 45);
x21 = ((uint64_t)(arg1[11]) << 37);
x22 = ((uint64_t)(arg1[10]) << 29);
x23 = ((uint64_t)(arg1[9]) << 21);
x24 = ((uint64_t)(arg1[8]) << 13);
x25 = ((uint64_t)(arg1[7]) << 5);
x26 = ((uint64_t)(arg1[6]) << 48);
x27 = ((uint64_t)(arg1[5]) << 40);
x28 = ((uint64_t)(arg1[4]) << 32);
x29 = ((uint64_t)(arg1[3]) << 24);
x30 = ((uint64_t)(arg1[2]) << 16);
x31 = ((uint64_t)(arg1[1]) << 8);
x32 = (arg1[0]);
x33 = (x31 + (uint64_t)x32);
x34 = (x30 + x33);
x35 = (x29 + x34);
x36 = (x28 + x35);
x37 = (x27 + x36);
x38 = (x26 + x37);
x39 = (x38 & UINT64_C(0x7ffffffffffff));
x40 = (uint8_t)(x38 >> 51);
x41 = (x25 + (uint64_t)x40);
x42 = (x24 + x41);
x43 = (x23 + x42);
x44 = (x22 + x43);
x45 = (x21 + x44);
x46 = (x20 + x45);
x47 = (x46 & UINT64_C(0x7ffffffffffff));
x48 = (uint8_t)(x46 >> 51);
x49 = (x19 + (uint64_t)x48);
x50 = (x18 + x49);
x51 = (x17 + x50);
x52 = (x16 + x51);
x53 = (x15 + x52);
x54 = (x14 + x53);
x55 = (x13 + x54);
x56 = (x55 & UINT64_C(0x7ffffffffffff));
x57 = (uint8_t)(x55 >> 51);
x58 = (x12 + (uint64_t)x57);
x59 = (x11 + x58);
x60 = (x10 + x59);
x61 = (x9 + x60);
x62 = (x8 + x61);
x63 = (x7 + x62);
x64 = (x63 & UINT64_C(0x7ffffffffffff));
x65 = (uint8_t)(x63 >> 51);
x66 = (x6 + (uint64_t)x65);
x67 = (x5 + x66);
x68 = (x4 + x67);
x69 = (x3 + x68);
x70 = (x2 + x69);
x71 = (x1 + x70);
out1[0] = x39;
out1[1] = x47;
out1[2] = x56;
out1[3] = x64;
out1[4] = x71;
}
/*
* The function fiat_25519_carry_scmul_121666 multiplies a field element by 121666 and reduces the result.
*
* Postconditions:
* eval out1 mod m = (121666 * eval arg1) mod m
*
*/
static FIAT_25519_FIAT_INLINE void fiat_25519_carry_scmul_121666(fiat_25519_tight_field_element out1, const fiat_25519_loose_field_element arg1) {
fiat_25519_uint128 x1;
fiat_25519_uint128 x2;
fiat_25519_uint128 x3;
fiat_25519_uint128 x4;
fiat_25519_uint128 x5;
uint64_t x6;
uint64_t x7;
fiat_25519_uint128 x8;
uint64_t x9;
uint64_t x10;
fiat_25519_uint128 x11;
uint64_t x12;
uint64_t x13;
fiat_25519_uint128 x14;
uint64_t x15;
uint64_t x16;
fiat_25519_uint128 x17;
uint64_t x18;
uint64_t x19;
uint64_t x20;
uint64_t x21;
fiat_25519_uint1 x22;
uint64_t x23;
uint64_t x24;
fiat_25519_uint1 x25;
uint64_t x26;
uint64_t x27;
x1 = ((fiat_25519_uint128)UINT32_C(0x1db42) * (arg1[4]));
x2 = ((fiat_25519_uint128)UINT32_C(0x1db42) * (arg1[3]));
x3 = ((fiat_25519_uint128)UINT32_C(0x1db42) * (arg1[2]));
x4 = ((fiat_25519_uint128)UINT32_C(0x1db42) * (arg1[1]));
x5 = ((fiat_25519_uint128)UINT32_C(0x1db42) * (arg1[0]));
x6 = (uint64_t)(x5 >> 51);
x7 = (uint64_t)(x5 & UINT64_C(0x7ffffffffffff));
x8 = (x6 + x4);
x9 = (uint64_t)(x8 >> 51);
x10 = (uint64_t)(x8 & UINT64_C(0x7ffffffffffff));
x11 = (x9 + x3);
x12 = (uint64_t)(x11 >> 51);
x13 = (uint64_t)(x11 & UINT64_C(0x7ffffffffffff));
x14 = (x12 + x2);
x15 = (uint64_t)(x14 >> 51);
x16 = (uint64_t)(x14 & UINT64_C(0x7ffffffffffff));
x17 = (x15 + x1);
x18 = (uint64_t)(x17 >> 51);
x19 = (uint64_t)(x17 & UINT64_C(0x7ffffffffffff));
x20 = (x18 * UINT8_C(0x13));
x21 = (x7 + x20);
x22 = (fiat_25519_uint1)(x21 >> 51);
x23 = (x21 & UINT64_C(0x7ffffffffffff));
x24 = (x22 + x10);
x25 = (fiat_25519_uint1)(x24 >> 51);
x26 = (x24 & UINT64_C(0x7ffffffffffff));
x27 = (x25 + x13);
out1[0] = x23;
out1[1] = x26;
out1[2] = x27;
out1[3] = x16;
out1[4] = x19;
}

View File

@@ -0,0 +1,695 @@
#include <ring-core/base.h>
#include "../../crypto/internal.h"
#include <stdbool.h>
#include <stdint.h>
#include <immintrin.h>
typedef uint64_t fe4[4];
typedef uint8_t fiat_uint1;
typedef int8_t fiat_int1;
static __inline__ uint64_t fiat_value_barrier_u64(uint64_t a) {
__asm__("" : "+r"(a) : /* no inputs */);
return a;
}
__attribute__((target("adx,bmi2")))
static inline void fe4_mul(fe4 out, const fe4 x, const fe4 y) { fiat_curve25519_adx_mul(out, x, y); }
__attribute__((target("adx,bmi2")))
static inline void fe4_sq(fe4 out, const fe4 x) { fiat_curve25519_adx_square(out, x); }
/*
* The function fiat_mulx_u64 is a multiplication, returning the full double-width result.
*
* Postconditions:
* out1 = (arg1 * arg2) mod 2^64
* out2 = ⌊arg1 * arg2 / 2^64⌋
*
* Input Bounds:
* arg1: [0x0 ~> 0xffffffffffffffff]
* arg2: [0x0 ~> 0xffffffffffffffff]
* Output Bounds:
* out1: [0x0 ~> 0xffffffffffffffff]
* out2: [0x0 ~> 0xffffffffffffffff]
*/
__attribute__((target("adx,bmi2")))
static inline void fiat_mulx_u64(uint64_t* out1, uint64_t* out2, uint64_t arg1, uint64_t arg2) {
// NOTE: edited after generation
#if defined(_M_X64)
unsigned long long t;
*out1 = _umul128(arg1, arg2, &t);
*out2 = t;
#elif defined(_M_ARM64)
*out1 = arg1 * arg2;
*out2 = __umulh(arg1, arg2);
#else
unsigned __int128 t = (unsigned __int128)arg1 * arg2;
*out1 = t;
*out2 = (t >> 64);
#endif
}
/*
* The function fiat_addcarryx_u64 is an addition with carry.
*
* Postconditions:
* out1 = (arg1 + arg2 + arg3) mod 2^64
* out2 = ⌊(arg1 + arg2 + arg3) / 2^64⌋
*
* Input Bounds:
* arg1: [0x0 ~> 0x1]
* arg2: [0x0 ~> 0xffffffffffffffff]
* arg3: [0x0 ~> 0xffffffffffffffff]
* Output Bounds:
* out1: [0x0 ~> 0xffffffffffffffff]
* out2: [0x0 ~> 0x1]
*/
__attribute__((target("adx,bmi2")))
static inline void fiat_addcarryx_u64(uint64_t* out1, fiat_uint1* out2, fiat_uint1 arg1, uint64_t arg2, uint64_t arg3) {
// NOTE: edited after generation
#if defined(__has_builtin)
# if __has_builtin(__builtin_ia32_addcarryx_u64)
# define addcarry64 __builtin_ia32_addcarryx_u64
# endif
#endif
#if defined(addcarry64)
long long unsigned int t;
*out2 = addcarry64(arg1, arg2, arg3, &t);
*out1 = t;
#elif defined(_M_X64)
long long unsigned int t;
*out2 = _addcarry_u64(arg1, arg2, arg3, out1);
*out1 = t;
#else
arg2 += arg1;
arg1 = arg2 < arg1;
uint64_t ret = arg2 + arg3;
arg1 += ret < arg2;
*out1 = ret;
*out2 = arg1;
#endif
#undef addcarry64
}
/*
* The function fiat_subborrowx_u64 is a subtraction with borrow.
*
* Postconditions:
* out1 = (-arg1 + arg2 + -arg3) mod 2^64
* out2 = -⌊(-arg1 + arg2 + -arg3) / 2^64⌋
*
* Input Bounds:
* arg1: [0x0 ~> 0x1]
* arg2: [0x0 ~> 0xffffffffffffffff]
* arg3: [0x0 ~> 0xffffffffffffffff]
* Output Bounds:
* out1: [0x0 ~> 0xffffffffffffffff]
* out2: [0x0 ~> 0x1]
*/
__attribute__((target("adx,bmi2")))
static inline void fiat_subborrowx_u64(uint64_t* out1, fiat_uint1* out2, fiat_uint1 arg1, uint64_t arg2, uint64_t arg3) {
#if defined(__has_builtin)
# if __has_builtin(__builtin_ia32_subborrow_u64)
# define subborrow64 __builtin_ia32_subborrow_u64
# endif
#endif
#if defined(subborrow64)
long long unsigned int t;
*out2 = subborrow64(arg1, arg2, arg3, &t);
*out1 = t;
#elif defined(_M_X64)
long long unsigned int t;
*out2 = _subborrow_u64(arg1, arg2, arg3, &t); // NOTE: edited after generation
*out1 = t;
#else
*out1 = arg2 - arg3 - arg1;
*out2 = (arg2 < arg3) | ((arg2 == arg3) & arg1);
#endif
#undef subborrow64
}
/*
* The function fiat_cmovznz_u64 is a single-word conditional move.
*
* Postconditions:
* out1 = (if arg1 = 0 then arg2 else arg3)
*
* Input Bounds:
* arg1: [0x0 ~> 0x1]
* arg2: [0x0 ~> 0xffffffffffffffff]
* arg3: [0x0 ~> 0xffffffffffffffff]
* Output Bounds:
* out1: [0x0 ~> 0xffffffffffffffff]
*/
__attribute__((target("adx,bmi2")))
static inline void fiat_cmovznz_u64(uint64_t* out1, fiat_uint1 arg1, uint64_t arg2, uint64_t arg3) {
fiat_uint1 x1;
uint64_t x2;
uint64_t x3;
x1 = (!(!arg1));
x2 = ((fiat_int1)(0x0 - x1) & UINT64_C(0xffffffffffffffff));
x3 = ((fiat_value_barrier_u64(x2) & arg3) | (fiat_value_barrier_u64((~x2)) & arg2));
*out1 = x3;
}
/*
* Input Bounds:
* arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
* arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
* Output Bounds:
* out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
*/
__attribute__((target("adx,bmi2")))
static void fe4_add(uint64_t out1[4], const uint64_t arg1[4], const uint64_t arg2[4]) {
uint64_t x1;
fiat_uint1 x2;
uint64_t x3;
fiat_uint1 x4;
uint64_t x5;
fiat_uint1 x6;
uint64_t x7;
fiat_uint1 x8;
uint64_t x9;
uint64_t x10;
fiat_uint1 x11;
uint64_t x12;
fiat_uint1 x13;
uint64_t x14;
fiat_uint1 x15;
uint64_t x16;
fiat_uint1 x17;
uint64_t x18;
uint64_t x19;
fiat_uint1 x20;
fiat_addcarryx_u64(&x1, &x2, 0x0, (arg1[0]), (arg2[0]));
fiat_addcarryx_u64(&x3, &x4, x2, (arg1[1]), (arg2[1]));
fiat_addcarryx_u64(&x5, &x6, x4, (arg1[2]), (arg2[2]));
fiat_addcarryx_u64(&x7, &x8, x6, (arg1[3]), (arg2[3]));
fiat_cmovznz_u64(&x9, x8, 0x0, UINT8_C(0x26)); // NOTE: clang 14 for Zen 2 uses sbb, and
fiat_addcarryx_u64(&x10, &x11, 0x0, x1, x9);
fiat_addcarryx_u64(&x12, &x13, x11, x3, 0x0);
fiat_addcarryx_u64(&x14, &x15, x13, x5, 0x0);
fiat_addcarryx_u64(&x16, &x17, x15, x7, 0x0);
fiat_cmovznz_u64(&x18, x17, 0x0, UINT8_C(0x26)); // NOTE: clang 14 for Zen 2 uses sbb, and
fiat_addcarryx_u64(&x19, &x20, 0x0, x10, x18);
out1[0] = x19;
out1[1] = x12;
out1[2] = x14;
out1[3] = x16;
}
/*
* Input Bounds:
* arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
* arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
* Output Bounds:
* out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
*/
__attribute__((target("adx,bmi2")))
static void fe4_sub(uint64_t out1[4], const uint64_t arg1[4], const uint64_t arg2[4]) {
uint64_t x1;
uint64_t x2;
fiat_uint1 x3;
uint64_t x4;
uint64_t x5;
fiat_uint1 x6;
uint64_t x7;
uint64_t x8;
fiat_uint1 x9;
uint64_t x10;
uint64_t x11;
fiat_uint1 x12;
uint64_t x13;
uint64_t x14;
fiat_uint1 x15;
uint64_t x16;
fiat_uint1 x17;
uint64_t x18;
fiat_uint1 x19;
uint64_t x20;
fiat_uint1 x21;
uint64_t x22;
uint64_t x23;
fiat_uint1 x24;
x1 = (arg2[0]);
fiat_subborrowx_u64(&x2, &x3, 0x0, (arg1[0]), x1);
x4 = (arg2[1]);
fiat_subborrowx_u64(&x5, &x6, x3, (arg1[1]), x4);
x7 = (arg2[2]);
fiat_subborrowx_u64(&x8, &x9, x6, (arg1[2]), x7);
x10 = (arg2[3]);
fiat_subborrowx_u64(&x11, &x12, x9, (arg1[3]), x10);
fiat_cmovznz_u64(&x13, x12, 0x0, UINT8_C(0x26)); // NOTE: clang 14 for Zen 2 uses sbb, and
fiat_subborrowx_u64(&x14, &x15, 0x0, x2, x13);
fiat_subborrowx_u64(&x16, &x17, x15, x5, 0x0);
fiat_subborrowx_u64(&x18, &x19, x17, x8, 0x0);
fiat_subborrowx_u64(&x20, &x21, x19, x11, 0x0);
fiat_cmovznz_u64(&x22, x21, 0x0, UINT8_C(0x26)); // NOTE: clang 14 for Zen 2 uses sbb, and
fiat_subborrowx_u64(&x23, &x24, 0x0, x14, x22);
out1[0] = x23;
out1[1] = x16;
out1[2] = x18;
out1[3] = x20;
}
/*
* Input Bounds:
* arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
* arg2: [0x0 ~> 0x3ffffffffffffff] // NOTE: this is not any uint64!
* Output Bounds:
* out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
*/
__attribute__((target("adx,bmi2")))
static void fe4_scmul(uint64_t out1[4], const uint64_t arg1[4], uint64_t arg2) {
uint64_t x1;
uint64_t x2;
uint64_t x3;
uint64_t x4;
uint64_t x5;
fiat_uint1 x6;
uint64_t x7;
uint64_t x8;
uint64_t x9;
fiat_uint1 x10;
uint64_t x11;
uint64_t x12;
uint64_t x13;
fiat_uint1 x14;
uint64_t x15;
uint64_t x16;
uint64_t x17;
fiat_uint1 x18;
uint64_t x19;
fiat_uint1 x20;
uint64_t x21;
fiat_uint1 x22;
uint64_t x23;
fiat_uint1 x24;
uint64_t x25;
uint64_t x26;
fiat_uint1 x27;
fiat_mulx_u64(&x1, &x2, (arg1[0]), arg2);
fiat_mulx_u64(&x3, &x4, (arg1[1]), arg2);
fiat_addcarryx_u64(&x5, &x6, 0x0, x2, x3);
fiat_mulx_u64(&x7, &x8, (arg1[2]), arg2);
fiat_addcarryx_u64(&x9, &x10, x6, x4, x7);
fiat_mulx_u64(&x11, &x12, (arg1[3]), arg2);
fiat_addcarryx_u64(&x13, &x14, x10, x8, x11);
fiat_mulx_u64(&x15, &x16, (x12 + (uint64_t)x14), UINT8_C(0x26));
fiat_addcarryx_u64(&x17, &x18, 0x0, x1, x15);
fiat_addcarryx_u64(&x19, &x20, x18, x5, 0x0);
fiat_addcarryx_u64(&x21, &x22, x20, x9, 0x0);
fiat_addcarryx_u64(&x23, &x24, x22, x13, 0x0);
fiat_cmovznz_u64(&x25, x24, 0x0, UINT8_C(0x26)); // NOTE: clang 14 for Zen 2 uses sbb, and
fiat_addcarryx_u64(&x26, &x27, 0x0, x17, x25);
out1[0] = x26;
out1[1] = x19;
out1[2] = x21;
out1[3] = x23;
}
/*
* Input Bounds:
* arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
* Output Bounds:
* out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
*/
__attribute__((target("adx,bmi2")))
static void fe4_canon(uint64_t out1[4], const uint64_t arg1[4]) {
uint64_t x1;
fiat_uint1 x2;
uint64_t x3;
fiat_uint1 x4;
uint64_t x5;
fiat_uint1 x6;
uint64_t x7;
fiat_uint1 x8;
uint64_t x9;
uint64_t x10;
uint64_t x11;
uint64_t x12;
uint64_t x13;
fiat_uint1 x14;
uint64_t x15;
fiat_uint1 x16;
uint64_t x17;
fiat_uint1 x18;
uint64_t x19;
fiat_uint1 x20;
uint64_t x21;
uint64_t x22;
uint64_t x23;
uint64_t x24;
fiat_subborrowx_u64(&x1, &x2, 0x0, (arg1[0]), UINT64_C(0xffffffffffffffed));
fiat_subborrowx_u64(&x3, &x4, x2, (arg1[1]), UINT64_C(0xffffffffffffffff));
fiat_subborrowx_u64(&x5, &x6, x4, (arg1[2]), UINT64_C(0xffffffffffffffff));
fiat_subborrowx_u64(&x7, &x8, x6, (arg1[3]), UINT64_C(0x7fffffffffffffff));
fiat_cmovznz_u64(&x9, x8, x1, (arg1[0]));
fiat_cmovznz_u64(&x10, x8, x3, (arg1[1]));
fiat_cmovznz_u64(&x11, x8, x5, (arg1[2]));
fiat_cmovznz_u64(&x12, x8, x7, (arg1[3]));
fiat_subborrowx_u64(&x13, &x14, 0x0, x9, UINT64_C(0xffffffffffffffed));
fiat_subborrowx_u64(&x15, &x16, x14, x10, UINT64_C(0xffffffffffffffff));
fiat_subborrowx_u64(&x17, &x18, x16, x11, UINT64_C(0xffffffffffffffff));
fiat_subborrowx_u64(&x19, &x20, x18, x12, UINT64_C(0x7fffffffffffffff));
fiat_cmovznz_u64(&x21, x20, x13, x9);
fiat_cmovznz_u64(&x22, x20, x15, x10);
fiat_cmovznz_u64(&x23, x20, x17, x11);
fiat_cmovznz_u64(&x24, x20, x19, x12);
out1[0] = x21;
out1[1] = x22;
out1[2] = x23;
out1[3] = x24;
}
/*
* Input Bounds:
* arg1: [0x0 ~> 0x1]
* arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
* arg3: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
* Output Bounds:
* out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
* out2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
*/
__attribute__((target("adx,bmi2")))
static void fe4_cswap(uint64_t out1[4], uint64_t out2[4], fiat_uint1 arg1, const uint64_t arg2[4], const uint64_t arg3[4]) {
uint64_t x1;
uint64_t x2;
uint64_t x3;
uint64_t x4;
uint64_t x5;
uint64_t x6;
uint64_t x7;
uint64_t x8;
// NOTE: clang 14 for Zen 2 uses YMM registers
fiat_cmovznz_u64(&x1, arg1, (arg2[0]), (arg3[0]));
fiat_cmovznz_u64(&x2, arg1, (arg2[1]), (arg3[1]));
fiat_cmovznz_u64(&x3, arg1, (arg2[2]), (arg3[2]));
fiat_cmovznz_u64(&x4, arg1, (arg2[3]), (arg3[3]));
fiat_cmovznz_u64(&x5, arg1, (arg3[0]), (arg2[0]));
fiat_cmovznz_u64(&x6, arg1, (arg3[1]), (arg2[1]));
fiat_cmovznz_u64(&x7, arg1, (arg3[2]), (arg2[2]));
fiat_cmovznz_u64(&x8, arg1, (arg3[3]), (arg2[3]));
out1[0] = x1;
out1[1] = x2;
out1[2] = x3;
out1[3] = x4;
out2[0] = x5;
out2[1] = x6;
out2[2] = x7;
out2[3] = x8;
}
// The following functions are adaped from crypto/curve25519/curve25519.c
// It would be desirable to share the code, but with the current field
// implementations both 4-limb and 5-limb versions of the curve-level code need
// to be included in builds targetting an unknown variant of x86_64.
__attribute__((target("adx,bmi2")))
static void fe4_invert(fe4 out, const fe4 z) {
fe4 t0;
fe4 t1;
fe4 t2;
fe4 t3;
int i;
fe4_sq(t0, z);
fe4_sq(t1, t0);
for (i = 1; i < 2; ++i) {
fe4_sq(t1, t1);
}
fe4_mul(t1, z, t1);
fe4_mul(t0, t0, t1);
fe4_sq(t2, t0);
fe4_mul(t1, t1, t2);
fe4_sq(t2, t1);
for (i = 1; i < 5; ++i) {
fe4_sq(t2, t2);
}
fe4_mul(t1, t2, t1);
fe4_sq(t2, t1);
for (i = 1; i < 10; ++i) {
fe4_sq(t2, t2);
}
fe4_mul(t2, t2, t1);
fe4_sq(t3, t2);
for (i = 1; i < 20; ++i) {
fe4_sq(t3, t3);
}
fe4_mul(t2, t3, t2);
fe4_sq(t2, t2);
for (i = 1; i < 10; ++i) {
fe4_sq(t2, t2);
}
fe4_mul(t1, t2, t1);
fe4_sq(t2, t1);
for (i = 1; i < 50; ++i) {
fe4_sq(t2, t2);
}
fe4_mul(t2, t2, t1);
fe4_sq(t3, t2);
for (i = 1; i < 100; ++i) {
fe4_sq(t3, t3);
}
fe4_mul(t2, t3, t2);
fe4_sq(t2, t2);
for (i = 1; i < 50; ++i) {
fe4_sq(t2, t2);
}
fe4_mul(t1, t2, t1);
fe4_sq(t1, t1);
for (i = 1; i < 5; ++i) {
fe4_sq(t1, t1);
}
fe4_mul(out, t1, t0);
}
RING_NOINLINE // https://github.com/rust-lang/rust/issues/116573
__attribute__((target("adx,bmi2")))
void x25519_scalar_mult_adx(uint8_t out[32], const uint8_t scalar[32],
const uint8_t point[32]) {
uint8_t e[32];
OPENSSL_memcpy(e, scalar, 32);
e[0] &= 248;
e[31] &= 127;
e[31] |= 64;
// The following implementation was transcribed to Coq and proven to
// correspond to unary scalar multiplication in affine coordinates given that
// x1 != 0 is the x coordinate of some point on the curve. It was also checked
// in Coq that doing a ladderstep with x1 = x3 = 0 gives z2' = z3' = 0, and z2
// = z3 = 0 gives z2' = z3' = 0. The statement was quantified over the
// underlying field, so it applies to Curve25519 itself and the quadratic
// twist of Curve25519. It was not proven in Coq that prime-field arithmetic
// correctly simulates extension-field arithmetic on prime-field values.
// The decoding of the byte array representation of e was not considered.
// Specification of Montgomery curves in affine coordinates:
// <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Spec/MontgomeryCurve.v#L27>
// Proof that these form a group that is isomorphic to a Weierstrass curve:
// <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/AffineProofs.v#L35>
// Coq transcription and correctness proof of the loop (where scalarbits=255):
// <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZ.v#L118>
// <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L278>
// preconditions: 0 <= e < 2^255 (not necessarily e < order), fe_invert(0) = 0
fe4 x1, x2 = {1}, z2 = {0}, x3, z3 = {1}, tmp0, tmp1;
OPENSSL_memcpy(x1, point, sizeof(fe4));
x1[3] &= (uint64_t)(-1)>>1;
OPENSSL_memcpy(x3, x1, sizeof(fe4));
unsigned swap = 0;
int pos;
for (pos = 254; pos >= 0; --pos) {
// loop invariant as of right before the test, for the case where x1 != 0:
// pos >= -1; if z2 = 0 then x2 is nonzero; if z3 = 0 then x3 is nonzero
// let r := e >> (pos+1) in the following equalities of projective points:
// to_xz (r*P) === if swap then (x3, z3) else (x2, z2)
// to_xz ((r+1)*P) === if swap then (x2, z2) else (x3, z3)
// x1 is the nonzero x coordinate of the nonzero point (r*P-(r+1)*P)
unsigned b = 1 & (e[pos / 8] >> (pos & 7));
swap ^= b;
fe4_cswap(x2, x3, swap, x2, x3);
fe4_cswap(z2, z3, swap, z2, z3);
swap = b;
// Coq transcription of ladderstep formula (called from transcribed loop):
// <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZ.v#L89>
// <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L131>
// x1 != 0 <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L217>
// x1 = 0 <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L147>
fe4_sub(tmp0, x3, z3);
fe4_sub(tmp1, x2, z2);
fe4_add(x2, x2, z2);
fe4_add(z2, x3, z3);
fe4_mul(z3, tmp0, x2);
fe4_mul(z2, z2, tmp1);
fe4_sq(tmp0, tmp1);
fe4_sq(tmp1, x2);
fe4_add(x3, z3, z2);
fe4_sub(z2, z3, z2);
fe4_mul(x2, tmp1, tmp0);
fe4_sub(tmp1, tmp1, tmp0);
fe4_sq(z2, z2);
fe4_scmul(z3, tmp1, 121666);
fe4_sq(x3, x3);
fe4_add(tmp0, tmp0, z3);
fe4_mul(z3, x1, z2);
fe4_mul(z2, tmp1, tmp0);
}
// here pos=-1, so r=e, so to_xz (e*P) === if swap then (x3, z3) else (x2, z2)
fe4_cswap(x2, x3, swap, x2, x3);
fe4_cswap(z2, z3, swap, z2, z3);
fe4_invert(z2, z2);
fe4_mul(x2, x2, z2);
fe4_canon(x2, x2);
OPENSSL_memcpy(out, x2, sizeof(fe4));
}
typedef struct {
fe4 X;
fe4 Y;
fe4 Z;
fe4 T;
} ge_p3_4;
typedef struct {
fe4 yplusx;
fe4 yminusx;
fe4 xy2d;
} ge_precomp_4;
__attribute__((target("adx,bmi2")))
static void inline_x25519_ge_dbl_4(ge_p3_4 *r, const ge_p3_4 *p, bool skip_t) {
// Transcribed from a Coq function proven against affine coordinates.
// https://github.com/mit-plv/fiat-crypto/blob/9943ba9e7d8f3e1c0054b2c94a5edca46ea73ef8/src/Curves/Edwards/XYZT/Basic.v#L136-L165
fe4 trX, trZ, trT, t0, cX, cY, cZ, cT;
fe4_sq(trX, p->X);
fe4_sq(trZ, p->Y);
fe4_sq(trT, p->Z);
fe4_add(trT, trT, trT);
fe4_add(cY, p->X, p->Y);
fe4_sq(t0, cY);
fe4_add(cY, trZ, trX);
fe4_sub(cZ, trZ, trX);
fe4_sub(cX, t0, cY);
fe4_sub(cT, trT, cZ);
fe4_mul(r->X, cX, cT);
fe4_mul(r->Y, cY, cZ);
fe4_mul(r->Z, cZ, cT);
if (!skip_t) {
fe4_mul(r->T, cX, cY);
}
}
__attribute__((target("adx,bmi2")))
__attribute__((always_inline)) // 4% speedup with clang14 and zen2
static inline void
ge_p3_add_p3_precomp_4(ge_p3_4 *r, const ge_p3_4 *p, const ge_precomp_4 *q) {
fe4 A, B, C, YplusX, YminusX, D, X3, Y3, Z3, T3;
// Transcribed from a Coq function proven against affine coordinates.
// https://github.com/mit-plv/fiat-crypto/blob/a36568d1d73aff5d7accc79fd28be672882f9c17/src/Curves/Edwards/XYZT/Precomputed.v#L38-L56
fe4_add(YplusX, p->Y, p->X);
fe4_sub(YminusX, p->Y, p->X);
fe4_mul(A, YplusX, q->yplusx);
fe4_mul(B, YminusX, q->yminusx);
fe4_mul(C, q->xy2d, p->T);
fe4_add(D, p->Z, p->Z);
fe4_sub(X3, A, B);
fe4_add(Y3, A, B);
fe4_add(Z3, D, C);
fe4_sub(T3, D, C);
fe4_mul(r->X, X3, T3);
fe4_mul(r->Y, Y3, Z3);
fe4_mul(r->Z, Z3, T3);
fe4_mul(r->T, X3, Y3);
}
__attribute__((always_inline)) // 25% speedup with clang14 and zen2
static inline void table_select_4(ge_precomp_4 *t, const int pos,
const signed char b) {
uint8_t bnegative = constant_time_msb_w(b);
uint8_t babs = b - ((bnegative & b) << 1);
uint8_t t_bytes[3][32] = {
{constant_time_is_zero_w(b) & 1}, {constant_time_is_zero_w(b) & 1}, {0}};
#if defined(__clang__)
__asm__("" : "+m" (t_bytes) : /*no inputs*/);
#endif
OPENSSL_STATIC_ASSERT(sizeof(t_bytes) == sizeof(k25519Precomp[pos][0]), "");
for (int i = 0; i < 8; i++) {
constant_time_conditional_memxor(t_bytes, k25519Precomp[pos][i],
sizeof(t_bytes),
constant_time_eq_w(babs, 1 + i));
}
OPENSSL_STATIC_ASSERT(sizeof(t_bytes) == sizeof(ge_precomp_4), "");
// fe4 uses saturated 64-bit limbs, so converting from bytes is just a copy.
OPENSSL_memcpy(t, t_bytes, sizeof(ge_precomp_4));
fe4 xy2d_neg = {0};
fe4_sub(xy2d_neg, xy2d_neg, t->xy2d);
constant_time_conditional_memcpy(t->yplusx, t_bytes[1], sizeof(fe4),
bnegative);
constant_time_conditional_memcpy(t->yminusx, t_bytes[0], sizeof(fe4),
bnegative);
constant_time_conditional_memcpy(t->xy2d, xy2d_neg, sizeof(fe4), bnegative);
}
// h = a * B
// where a = a[0]+256*a[1]+...+256^31 a[31]
// B is the Ed25519 base point (x,4/5) with x positive.
//
// Preconditions:
// a[31] <= 127
RING_NOINLINE // https://github.com/rust-lang/rust/issues/116573
__attribute__((target("adx,bmi2")))
void x25519_ge_scalarmult_base_adx(uint8_t h[4][32], const uint8_t a[32]) {
signed char e[64];
signed char carry;
for (unsigned i = 0; i < 32; ++i) {
e[2 * i + 0] = (a[i] >> 0) & 15;
e[2 * i + 1] = (a[i] >> 4) & 15;
}
// each e[i] is between 0 and 15
// e[63] is between 0 and 7
carry = 0;
for (unsigned i = 0; i < 63; ++i) {
e[i] += carry;
carry = e[i] + 8;
carry >>= 4;
e[i] -= carry << 4;
}
e[63] += carry;
// each e[i] is between -8 and 8
ge_p3_4 r = {{0}, {1}, {1}, {0}};
for (unsigned i = 1; i < 64; i += 2) {
ge_precomp_4 t;
table_select_4(&t, i / 2, e[i]);
ge_p3_add_p3_precomp_4(&r, &r, &t);
}
inline_x25519_ge_dbl_4(&r, &r, /*skip_t=*/true);
inline_x25519_ge_dbl_4(&r, &r, /*skip_t=*/true);
inline_x25519_ge_dbl_4(&r, &r, /*skip_t=*/true);
inline_x25519_ge_dbl_4(&r, &r, /*skip_t=*/false);
for (unsigned i = 0; i < 64; i += 2) {
ge_precomp_4 t;
table_select_4(&t, i / 2, e[i]);
ge_p3_add_p3_precomp_4(&r, &r, &t);
}
// fe4 uses saturated 64-bit limbs, so converting to bytes is just a copy.
// Satisfy stated precondition of fiat_25519_from_bytes; tests pass either way
fe4_canon(r.X, r.X);
fe4_canon(r.Y, r.Y);
fe4_canon(r.Z, r.Z);
fe4_canon(r.T, r.T);
OPENSSL_STATIC_ASSERT(sizeof(ge_p3_4) == sizeof(uint8_t[4][32]), "");
OPENSSL_memcpy(h, &r, sizeof(ge_p3_4));
}

File diff suppressed because it is too large Load Diff

2511
vendor/ring/third_party/fiat/p256_32.h vendored Normal file

File diff suppressed because it is too large Load Diff

957
vendor/ring/third_party/fiat/p256_64.h vendored Normal file
View File

@@ -0,0 +1,957 @@
/* Autogenerated: 'src/ExtractionOCaml/word_by_word_montgomery' --inline --static --use-value-barrier p256 64 '2^256 - 2^224 + 2^192 + 2^96 - 1' mul square add sub opp from_montgomery to_montgomery nonzero selectznz to_bytes from_bytes one msat divstep divstep_precomp */
/* curve description: p256 */
/* machine_wordsize = 64 (from "64") */
/* requested operations: mul, square, add, sub, opp, from_montgomery, to_montgomery, nonzero, selectznz, to_bytes, from_bytes, one, msat, divstep, divstep_precomp */
/* m = 0xffffffff00000001000000000000000000000000ffffffffffffffffffffffff (from "2^256 - 2^224 + 2^192 + 2^96 - 1") */
/* */
/* NOTE: In addition to the bounds specified above each function, all */
/* functions synthesized for this Montgomery arithmetic require the */
/* input to be strictly less than the prime modulus (m), and also */
/* require the input to be in the unique saturated representation. */
/* All functions also ensure that these two properties are true of */
/* return values. */
/* */
/* Computed values: */
/* eval z = z[0] + (z[1] << 64) + (z[2] << 128) + (z[3] << 192) */
/* bytes_eval z = z[0] + (z[1] << 8) + (z[2] << 16) + (z[3] << 24) + (z[4] << 32) + (z[5] << 40) + (z[6] << 48) + (z[7] << 56) + (z[8] << 64) + (z[9] << 72) + (z[10] << 80) + (z[11] << 88) + (z[12] << 96) + (z[13] << 104) + (z[14] << 112) + (z[15] << 120) + (z[16] << 128) + (z[17] << 136) + (z[18] << 144) + (z[19] << 152) + (z[20] << 160) + (z[21] << 168) + (z[22] << 176) + (z[23] << 184) + (z[24] << 192) + (z[25] << 200) + (z[26] << 208) + (z[27] << 216) + (z[28] << 224) + (z[29] << 232) + (z[30] << 240) + (z[31] << 248) */
/* twos_complement_eval z = let x1 := z[0] + (z[1] << 64) + (z[2] << 128) + (z[3] << 192) in */
/* if x1 & (2^256-1) < 2^255 then x1 & (2^256-1) else (x1 & (2^256-1)) - 2^256 */
#include <stdint.h>
typedef unsigned char fiat_p256_uint1;
typedef signed char fiat_p256_int1;
#if defined(__GNUC__) || defined(__clang__)
# define FIAT_P256_FIAT_EXTENSION __extension__
# define FIAT_P256_FIAT_INLINE __inline__
#else
# define FIAT_P256_FIAT_EXTENSION
# define FIAT_P256_FIAT_INLINE
#endif
FIAT_P256_FIAT_EXTENSION typedef signed __int128 fiat_p256_int128;
FIAT_P256_FIAT_EXTENSION typedef unsigned __int128 fiat_p256_uint128;
/* The type fiat_p256_montgomery_domain_field_element is a field element in the Montgomery domain. */
/* Bounds: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] */
typedef uint64_t fiat_p256_montgomery_domain_field_element[4];
/* The type fiat_p256_non_montgomery_domain_field_element is a field element NOT in the Montgomery domain. */
/* Bounds: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] */
typedef uint64_t fiat_p256_non_montgomery_domain_field_element[4];
#if (-1 & 3) != 3
#error "This code only works on a two's complement system"
#endif
#if !defined(FIAT_P256_NO_ASM) && (defined(__GNUC__) || defined(__clang__))
static __inline__ uint64_t fiat_p256_value_barrier_u64(uint64_t a) {
__asm__("" : "+r"(a) : /* no inputs */);
return a;
}
#else
# define fiat_p256_value_barrier_u64(x) (x)
#endif
/*
* The function fiat_p256_addcarryx_u64 is an addition with carry.
*
* Postconditions:
* out1 = (arg1 + arg2 + arg3) mod 2^64
* out2 = ⌊(arg1 + arg2 + arg3) / 2^64⌋
*
* Input Bounds:
* arg1: [0x0 ~> 0x1]
* arg2: [0x0 ~> 0xffffffffffffffff]
* arg3: [0x0 ~> 0xffffffffffffffff]
* Output Bounds:
* out1: [0x0 ~> 0xffffffffffffffff]
* out2: [0x0 ~> 0x1]
*/
static FIAT_P256_FIAT_INLINE void fiat_p256_addcarryx_u64(uint64_t* out1, fiat_p256_uint1* out2, fiat_p256_uint1 arg1, uint64_t arg2, uint64_t arg3) {
fiat_p256_uint128 x1;
uint64_t x2;
fiat_p256_uint1 x3;
x1 = ((arg1 + (fiat_p256_uint128)arg2) + arg3);
x2 = (uint64_t)(x1 & UINT64_C(0xffffffffffffffff));
x3 = (fiat_p256_uint1)(x1 >> 64);
*out1 = x2;
*out2 = x3;
}
/*
* The function fiat_p256_subborrowx_u64 is a subtraction with borrow.
*
* Postconditions:
* out1 = (-arg1 + arg2 + -arg3) mod 2^64
* out2 = -⌊(-arg1 + arg2 + -arg3) / 2^64⌋
*
* Input Bounds:
* arg1: [0x0 ~> 0x1]
* arg2: [0x0 ~> 0xffffffffffffffff]
* arg3: [0x0 ~> 0xffffffffffffffff]
* Output Bounds:
* out1: [0x0 ~> 0xffffffffffffffff]
* out2: [0x0 ~> 0x1]
*/
static FIAT_P256_FIAT_INLINE void fiat_p256_subborrowx_u64(uint64_t* out1, fiat_p256_uint1* out2, fiat_p256_uint1 arg1, uint64_t arg2, uint64_t arg3) {
fiat_p256_int128 x1;
fiat_p256_int1 x2;
uint64_t x3;
x1 = ((arg2 - (fiat_p256_int128)arg1) - arg3);
x2 = (fiat_p256_int1)(x1 >> 64);
x3 = (uint64_t)(x1 & UINT64_C(0xffffffffffffffff));
*out1 = x3;
*out2 = (fiat_p256_uint1)(0x0 - x2);
}
/*
* The function fiat_p256_mulx_u64 is a multiplication, returning the full double-width result.
*
* Postconditions:
* out1 = (arg1 * arg2) mod 2^64
* out2 = ⌊arg1 * arg2 / 2^64⌋
*
* Input Bounds:
* arg1: [0x0 ~> 0xffffffffffffffff]
* arg2: [0x0 ~> 0xffffffffffffffff]
* Output Bounds:
* out1: [0x0 ~> 0xffffffffffffffff]
* out2: [0x0 ~> 0xffffffffffffffff]
*/
static FIAT_P256_FIAT_INLINE void fiat_p256_mulx_u64(uint64_t* out1, uint64_t* out2, uint64_t arg1, uint64_t arg2) {
fiat_p256_uint128 x1;
uint64_t x2;
uint64_t x3;
x1 = ((fiat_p256_uint128)arg1 * arg2);
x2 = (uint64_t)(x1 & UINT64_C(0xffffffffffffffff));
x3 = (uint64_t)(x1 >> 64);
*out1 = x2;
*out2 = x3;
}
/*
* The function fiat_p256_cmovznz_u64 is a single-word conditional move.
*
* Postconditions:
* out1 = (if arg1 = 0 then arg2 else arg3)
*
* Input Bounds:
* arg1: [0x0 ~> 0x1]
* arg2: [0x0 ~> 0xffffffffffffffff]
* arg3: [0x0 ~> 0xffffffffffffffff]
* Output Bounds:
* out1: [0x0 ~> 0xffffffffffffffff]
*/
static FIAT_P256_FIAT_INLINE void fiat_p256_cmovznz_u64(uint64_t* out1, fiat_p256_uint1 arg1, uint64_t arg2, uint64_t arg3) {
fiat_p256_uint1 x1;
uint64_t x2;
uint64_t x3;
x1 = (!(!arg1));
x2 = ((fiat_p256_int1)(0x0 - x1) & UINT64_C(0xffffffffffffffff));
x3 = ((fiat_p256_value_barrier_u64(x2) & arg3) | (fiat_p256_value_barrier_u64((~x2)) & arg2));
*out1 = x3;
}
/*
* The function fiat_p256_mul multiplies two field elements in the Montgomery domain.
*
* Preconditions:
* 0 ≤ eval arg1 < m
* 0 ≤ eval arg2 < m
* Postconditions:
* eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) * eval (from_montgomery arg2)) mod m
* 0 ≤ eval out1 < m
*
*/
static FIAT_P256_FIAT_INLINE void fiat_p256_mul(fiat_p256_montgomery_domain_field_element out1, const fiat_p256_montgomery_domain_field_element arg1, const fiat_p256_montgomery_domain_field_element arg2) {
uint64_t x1;
uint64_t x2;
uint64_t x3;
uint64_t x4;
uint64_t x5;
uint64_t x6;
uint64_t x7;
uint64_t x8;
uint64_t x9;
uint64_t x10;
uint64_t x11;
uint64_t x12;
uint64_t x13;
fiat_p256_uint1 x14;
uint64_t x15;
fiat_p256_uint1 x16;
uint64_t x17;
fiat_p256_uint1 x18;
uint64_t x19;
uint64_t x20;
uint64_t x21;
uint64_t x22;
uint64_t x23;
uint64_t x24;
uint64_t x25;
uint64_t x26;
fiat_p256_uint1 x27;
uint64_t x28;
uint64_t x29;
fiat_p256_uint1 x30;
uint64_t x31;
fiat_p256_uint1 x32;
uint64_t x33;
fiat_p256_uint1 x34;
uint64_t x35;
fiat_p256_uint1 x36;
uint64_t x37;
fiat_p256_uint1 x38;
uint64_t x39;
uint64_t x40;
uint64_t x41;
uint64_t x42;
uint64_t x43;
uint64_t x44;
uint64_t x45;
uint64_t x46;
uint64_t x47;
fiat_p256_uint1 x48;
uint64_t x49;
fiat_p256_uint1 x50;
uint64_t x51;
fiat_p256_uint1 x52;
uint64_t x53;
uint64_t x54;
fiat_p256_uint1 x55;
uint64_t x56;
fiat_p256_uint1 x57;
uint64_t x58;
fiat_p256_uint1 x59;
uint64_t x60;
fiat_p256_uint1 x61;
uint64_t x62;
fiat_p256_uint1 x63;
uint64_t x64;
uint64_t x65;
uint64_t x66;
uint64_t x67;
uint64_t x68;
uint64_t x69;
uint64_t x70;
fiat_p256_uint1 x71;
uint64_t x72;
uint64_t x73;
fiat_p256_uint1 x74;
uint64_t x75;
fiat_p256_uint1 x76;
uint64_t x77;
fiat_p256_uint1 x78;
uint64_t x79;
fiat_p256_uint1 x80;
uint64_t x81;
fiat_p256_uint1 x82;
uint64_t x83;
uint64_t x84;
uint64_t x85;
uint64_t x86;
uint64_t x87;
uint64_t x88;
uint64_t x89;
uint64_t x90;
uint64_t x91;
uint64_t x92;
fiat_p256_uint1 x93;
uint64_t x94;
fiat_p256_uint1 x95;
uint64_t x96;
fiat_p256_uint1 x97;
uint64_t x98;
uint64_t x99;
fiat_p256_uint1 x100;
uint64_t x101;
fiat_p256_uint1 x102;
uint64_t x103;
fiat_p256_uint1 x104;
uint64_t x105;
fiat_p256_uint1 x106;
uint64_t x107;
fiat_p256_uint1 x108;
uint64_t x109;
uint64_t x110;
uint64_t x111;
uint64_t x112;
uint64_t x113;
uint64_t x114;
uint64_t x115;
fiat_p256_uint1 x116;
uint64_t x117;
uint64_t x118;
fiat_p256_uint1 x119;
uint64_t x120;
fiat_p256_uint1 x121;
uint64_t x122;
fiat_p256_uint1 x123;
uint64_t x124;
fiat_p256_uint1 x125;
uint64_t x126;
fiat_p256_uint1 x127;
uint64_t x128;
uint64_t x129;
uint64_t x130;
uint64_t x131;
uint64_t x132;
uint64_t x133;
uint64_t x134;
uint64_t x135;
uint64_t x136;
uint64_t x137;
fiat_p256_uint1 x138;
uint64_t x139;
fiat_p256_uint1 x140;
uint64_t x141;
fiat_p256_uint1 x142;
uint64_t x143;
uint64_t x144;
fiat_p256_uint1 x145;
uint64_t x146;
fiat_p256_uint1 x147;
uint64_t x148;
fiat_p256_uint1 x149;
uint64_t x150;
fiat_p256_uint1 x151;
uint64_t x152;
fiat_p256_uint1 x153;
uint64_t x154;
uint64_t x155;
uint64_t x156;
uint64_t x157;
uint64_t x158;
uint64_t x159;
uint64_t x160;
fiat_p256_uint1 x161;
uint64_t x162;
uint64_t x163;
fiat_p256_uint1 x164;
uint64_t x165;
fiat_p256_uint1 x166;
uint64_t x167;
fiat_p256_uint1 x168;
uint64_t x169;
fiat_p256_uint1 x170;
uint64_t x171;
fiat_p256_uint1 x172;
uint64_t x173;
uint64_t x174;
fiat_p256_uint1 x175;
uint64_t x176;
fiat_p256_uint1 x177;
uint64_t x178;
fiat_p256_uint1 x179;
uint64_t x180;
fiat_p256_uint1 x181;
uint64_t x182;
fiat_p256_uint1 x183;
uint64_t x184;
uint64_t x185;
uint64_t x186;
uint64_t x187;
x1 = (arg1[1]);
x2 = (arg1[2]);
x3 = (arg1[3]);
x4 = (arg1[0]);
fiat_p256_mulx_u64(&x5, &x6, x4, (arg2[3]));
fiat_p256_mulx_u64(&x7, &x8, x4, (arg2[2]));
fiat_p256_mulx_u64(&x9, &x10, x4, (arg2[1]));
fiat_p256_mulx_u64(&x11, &x12, x4, (arg2[0]));
fiat_p256_addcarryx_u64(&x13, &x14, 0x0, x12, x9);
fiat_p256_addcarryx_u64(&x15, &x16, x14, x10, x7);
fiat_p256_addcarryx_u64(&x17, &x18, x16, x8, x5);
x19 = (x18 + x6);
fiat_p256_mulx_u64(&x20, &x21, x11, UINT64_C(0xffffffff00000001));
fiat_p256_mulx_u64(&x22, &x23, x11, UINT32_C(0xffffffff));
fiat_p256_mulx_u64(&x24, &x25, x11, UINT64_C(0xffffffffffffffff));
fiat_p256_addcarryx_u64(&x26, &x27, 0x0, x25, x22);
x28 = (x27 + x23);
fiat_p256_addcarryx_u64(&x29, &x30, 0x0, x11, x24);
fiat_p256_addcarryx_u64(&x31, &x32, x30, x13, x26);
fiat_p256_addcarryx_u64(&x33, &x34, x32, x15, x28);
fiat_p256_addcarryx_u64(&x35, &x36, x34, x17, x20);
fiat_p256_addcarryx_u64(&x37, &x38, x36, x19, x21);
fiat_p256_mulx_u64(&x39, &x40, x1, (arg2[3]));
fiat_p256_mulx_u64(&x41, &x42, x1, (arg2[2]));
fiat_p256_mulx_u64(&x43, &x44, x1, (arg2[1]));
fiat_p256_mulx_u64(&x45, &x46, x1, (arg2[0]));
fiat_p256_addcarryx_u64(&x47, &x48, 0x0, x46, x43);
fiat_p256_addcarryx_u64(&x49, &x50, x48, x44, x41);
fiat_p256_addcarryx_u64(&x51, &x52, x50, x42, x39);
x53 = (x52 + x40);
fiat_p256_addcarryx_u64(&x54, &x55, 0x0, x31, x45);
fiat_p256_addcarryx_u64(&x56, &x57, x55, x33, x47);
fiat_p256_addcarryx_u64(&x58, &x59, x57, x35, x49);
fiat_p256_addcarryx_u64(&x60, &x61, x59, x37, x51);
fiat_p256_addcarryx_u64(&x62, &x63, x61, x38, x53);
fiat_p256_mulx_u64(&x64, &x65, x54, UINT64_C(0xffffffff00000001));
fiat_p256_mulx_u64(&x66, &x67, x54, UINT32_C(0xffffffff));
fiat_p256_mulx_u64(&x68, &x69, x54, UINT64_C(0xffffffffffffffff));
fiat_p256_addcarryx_u64(&x70, &x71, 0x0, x69, x66);
x72 = (x71 + x67);
fiat_p256_addcarryx_u64(&x73, &x74, 0x0, x54, x68);
fiat_p256_addcarryx_u64(&x75, &x76, x74, x56, x70);
fiat_p256_addcarryx_u64(&x77, &x78, x76, x58, x72);
fiat_p256_addcarryx_u64(&x79, &x80, x78, x60, x64);
fiat_p256_addcarryx_u64(&x81, &x82, x80, x62, x65);
x83 = ((uint64_t)x82 + x63);
fiat_p256_mulx_u64(&x84, &x85, x2, (arg2[3]));
fiat_p256_mulx_u64(&x86, &x87, x2, (arg2[2]));
fiat_p256_mulx_u64(&x88, &x89, x2, (arg2[1]));
fiat_p256_mulx_u64(&x90, &x91, x2, (arg2[0]));
fiat_p256_addcarryx_u64(&x92, &x93, 0x0, x91, x88);
fiat_p256_addcarryx_u64(&x94, &x95, x93, x89, x86);
fiat_p256_addcarryx_u64(&x96, &x97, x95, x87, x84);
x98 = (x97 + x85);
fiat_p256_addcarryx_u64(&x99, &x100, 0x0, x75, x90);
fiat_p256_addcarryx_u64(&x101, &x102, x100, x77, x92);
fiat_p256_addcarryx_u64(&x103, &x104, x102, x79, x94);
fiat_p256_addcarryx_u64(&x105, &x106, x104, x81, x96);
fiat_p256_addcarryx_u64(&x107, &x108, x106, x83, x98);
fiat_p256_mulx_u64(&x109, &x110, x99, UINT64_C(0xffffffff00000001));
fiat_p256_mulx_u64(&x111, &x112, x99, UINT32_C(0xffffffff));
fiat_p256_mulx_u64(&x113, &x114, x99, UINT64_C(0xffffffffffffffff));
fiat_p256_addcarryx_u64(&x115, &x116, 0x0, x114, x111);
x117 = (x116 + x112);
fiat_p256_addcarryx_u64(&x118, &x119, 0x0, x99, x113);
fiat_p256_addcarryx_u64(&x120, &x121, x119, x101, x115);
fiat_p256_addcarryx_u64(&x122, &x123, x121, x103, x117);
fiat_p256_addcarryx_u64(&x124, &x125, x123, x105, x109);
fiat_p256_addcarryx_u64(&x126, &x127, x125, x107, x110);
x128 = ((uint64_t)x127 + x108);
fiat_p256_mulx_u64(&x129, &x130, x3, (arg2[3]));
fiat_p256_mulx_u64(&x131, &x132, x3, (arg2[2]));
fiat_p256_mulx_u64(&x133, &x134, x3, (arg2[1]));
fiat_p256_mulx_u64(&x135, &x136, x3, (arg2[0]));
fiat_p256_addcarryx_u64(&x137, &x138, 0x0, x136, x133);
fiat_p256_addcarryx_u64(&x139, &x140, x138, x134, x131);
fiat_p256_addcarryx_u64(&x141, &x142, x140, x132, x129);
x143 = (x142 + x130);
fiat_p256_addcarryx_u64(&x144, &x145, 0x0, x120, x135);
fiat_p256_addcarryx_u64(&x146, &x147, x145, x122, x137);
fiat_p256_addcarryx_u64(&x148, &x149, x147, x124, x139);
fiat_p256_addcarryx_u64(&x150, &x151, x149, x126, x141);
fiat_p256_addcarryx_u64(&x152, &x153, x151, x128, x143);
fiat_p256_mulx_u64(&x154, &x155, x144, UINT64_C(0xffffffff00000001));
fiat_p256_mulx_u64(&x156, &x157, x144, UINT32_C(0xffffffff));
fiat_p256_mulx_u64(&x158, &x159, x144, UINT64_C(0xffffffffffffffff));
fiat_p256_addcarryx_u64(&x160, &x161, 0x0, x159, x156);
x162 = (x161 + x157);
fiat_p256_addcarryx_u64(&x163, &x164, 0x0, x144, x158);
fiat_p256_addcarryx_u64(&x165, &x166, x164, x146, x160);
fiat_p256_addcarryx_u64(&x167, &x168, x166, x148, x162);
fiat_p256_addcarryx_u64(&x169, &x170, x168, x150, x154);
fiat_p256_addcarryx_u64(&x171, &x172, x170, x152, x155);
x173 = ((uint64_t)x172 + x153);
fiat_p256_subborrowx_u64(&x174, &x175, 0x0, x165, UINT64_C(0xffffffffffffffff));
fiat_p256_subborrowx_u64(&x176, &x177, x175, x167, UINT32_C(0xffffffff));
fiat_p256_subborrowx_u64(&x178, &x179, x177, x169, 0x0);
fiat_p256_subborrowx_u64(&x180, &x181, x179, x171, UINT64_C(0xffffffff00000001));
fiat_p256_subborrowx_u64(&x182, &x183, x181, x173, 0x0);
fiat_p256_cmovznz_u64(&x184, x183, x174, x165);
fiat_p256_cmovznz_u64(&x185, x183, x176, x167);
fiat_p256_cmovznz_u64(&x186, x183, x178, x169);
fiat_p256_cmovznz_u64(&x187, x183, x180, x171);
out1[0] = x184;
out1[1] = x185;
out1[2] = x186;
out1[3] = x187;
}
/*
* The function fiat_p256_square squares a field element in the Montgomery domain.
*
* Preconditions:
* 0 ≤ eval arg1 < m
* Postconditions:
* eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) * eval (from_montgomery arg1)) mod m
* 0 ≤ eval out1 < m
*
*/
static FIAT_P256_FIAT_INLINE void fiat_p256_square(fiat_p256_montgomery_domain_field_element out1, const fiat_p256_montgomery_domain_field_element arg1) {
uint64_t x1;
uint64_t x2;
uint64_t x3;
uint64_t x4;
uint64_t x5;
uint64_t x6;
uint64_t x7;
uint64_t x8;
uint64_t x9;
uint64_t x10;
uint64_t x11;
uint64_t x12;
uint64_t x13;
fiat_p256_uint1 x14;
uint64_t x15;
fiat_p256_uint1 x16;
uint64_t x17;
fiat_p256_uint1 x18;
uint64_t x19;
uint64_t x20;
uint64_t x21;
uint64_t x22;
uint64_t x23;
uint64_t x24;
uint64_t x25;
uint64_t x26;
fiat_p256_uint1 x27;
uint64_t x28;
uint64_t x29;
fiat_p256_uint1 x30;
uint64_t x31;
fiat_p256_uint1 x32;
uint64_t x33;
fiat_p256_uint1 x34;
uint64_t x35;
fiat_p256_uint1 x36;
uint64_t x37;
fiat_p256_uint1 x38;
uint64_t x39;
uint64_t x40;
uint64_t x41;
uint64_t x42;
uint64_t x43;
uint64_t x44;
uint64_t x45;
uint64_t x46;
uint64_t x47;
fiat_p256_uint1 x48;
uint64_t x49;
fiat_p256_uint1 x50;
uint64_t x51;
fiat_p256_uint1 x52;
uint64_t x53;
uint64_t x54;
fiat_p256_uint1 x55;
uint64_t x56;
fiat_p256_uint1 x57;
uint64_t x58;
fiat_p256_uint1 x59;
uint64_t x60;
fiat_p256_uint1 x61;
uint64_t x62;
fiat_p256_uint1 x63;
uint64_t x64;
uint64_t x65;
uint64_t x66;
uint64_t x67;
uint64_t x68;
uint64_t x69;
uint64_t x70;
fiat_p256_uint1 x71;
uint64_t x72;
uint64_t x73;
fiat_p256_uint1 x74;
uint64_t x75;
fiat_p256_uint1 x76;
uint64_t x77;
fiat_p256_uint1 x78;
uint64_t x79;
fiat_p256_uint1 x80;
uint64_t x81;
fiat_p256_uint1 x82;
uint64_t x83;
uint64_t x84;
uint64_t x85;
uint64_t x86;
uint64_t x87;
uint64_t x88;
uint64_t x89;
uint64_t x90;
uint64_t x91;
uint64_t x92;
fiat_p256_uint1 x93;
uint64_t x94;
fiat_p256_uint1 x95;
uint64_t x96;
fiat_p256_uint1 x97;
uint64_t x98;
uint64_t x99;
fiat_p256_uint1 x100;
uint64_t x101;
fiat_p256_uint1 x102;
uint64_t x103;
fiat_p256_uint1 x104;
uint64_t x105;
fiat_p256_uint1 x106;
uint64_t x107;
fiat_p256_uint1 x108;
uint64_t x109;
uint64_t x110;
uint64_t x111;
uint64_t x112;
uint64_t x113;
uint64_t x114;
uint64_t x115;
fiat_p256_uint1 x116;
uint64_t x117;
uint64_t x118;
fiat_p256_uint1 x119;
uint64_t x120;
fiat_p256_uint1 x121;
uint64_t x122;
fiat_p256_uint1 x123;
uint64_t x124;
fiat_p256_uint1 x125;
uint64_t x126;
fiat_p256_uint1 x127;
uint64_t x128;
uint64_t x129;
uint64_t x130;
uint64_t x131;
uint64_t x132;
uint64_t x133;
uint64_t x134;
uint64_t x135;
uint64_t x136;
uint64_t x137;
fiat_p256_uint1 x138;
uint64_t x139;
fiat_p256_uint1 x140;
uint64_t x141;
fiat_p256_uint1 x142;
uint64_t x143;
uint64_t x144;
fiat_p256_uint1 x145;
uint64_t x146;
fiat_p256_uint1 x147;
uint64_t x148;
fiat_p256_uint1 x149;
uint64_t x150;
fiat_p256_uint1 x151;
uint64_t x152;
fiat_p256_uint1 x153;
uint64_t x154;
uint64_t x155;
uint64_t x156;
uint64_t x157;
uint64_t x158;
uint64_t x159;
uint64_t x160;
fiat_p256_uint1 x161;
uint64_t x162;
uint64_t x163;
fiat_p256_uint1 x164;
uint64_t x165;
fiat_p256_uint1 x166;
uint64_t x167;
fiat_p256_uint1 x168;
uint64_t x169;
fiat_p256_uint1 x170;
uint64_t x171;
fiat_p256_uint1 x172;
uint64_t x173;
uint64_t x174;
fiat_p256_uint1 x175;
uint64_t x176;
fiat_p256_uint1 x177;
uint64_t x178;
fiat_p256_uint1 x179;
uint64_t x180;
fiat_p256_uint1 x181;
uint64_t x182;
fiat_p256_uint1 x183;
uint64_t x184;
uint64_t x185;
uint64_t x186;
uint64_t x187;
x1 = (arg1[1]);
x2 = (arg1[2]);
x3 = (arg1[3]);
x4 = (arg1[0]);
fiat_p256_mulx_u64(&x5, &x6, x4, (arg1[3]));
fiat_p256_mulx_u64(&x7, &x8, x4, (arg1[2]));
fiat_p256_mulx_u64(&x9, &x10, x4, (arg1[1]));
fiat_p256_mulx_u64(&x11, &x12, x4, (arg1[0]));
fiat_p256_addcarryx_u64(&x13, &x14, 0x0, x12, x9);
fiat_p256_addcarryx_u64(&x15, &x16, x14, x10, x7);
fiat_p256_addcarryx_u64(&x17, &x18, x16, x8, x5);
x19 = (x18 + x6);
fiat_p256_mulx_u64(&x20, &x21, x11, UINT64_C(0xffffffff00000001));
fiat_p256_mulx_u64(&x22, &x23, x11, UINT32_C(0xffffffff));
fiat_p256_mulx_u64(&x24, &x25, x11, UINT64_C(0xffffffffffffffff));
fiat_p256_addcarryx_u64(&x26, &x27, 0x0, x25, x22);
x28 = (x27 + x23);
fiat_p256_addcarryx_u64(&x29, &x30, 0x0, x11, x24);
fiat_p256_addcarryx_u64(&x31, &x32, x30, x13, x26);
fiat_p256_addcarryx_u64(&x33, &x34, x32, x15, x28);
fiat_p256_addcarryx_u64(&x35, &x36, x34, x17, x20);
fiat_p256_addcarryx_u64(&x37, &x38, x36, x19, x21);
fiat_p256_mulx_u64(&x39, &x40, x1, (arg1[3]));
fiat_p256_mulx_u64(&x41, &x42, x1, (arg1[2]));
fiat_p256_mulx_u64(&x43, &x44, x1, (arg1[1]));
fiat_p256_mulx_u64(&x45, &x46, x1, (arg1[0]));
fiat_p256_addcarryx_u64(&x47, &x48, 0x0, x46, x43);
fiat_p256_addcarryx_u64(&x49, &x50, x48, x44, x41);
fiat_p256_addcarryx_u64(&x51, &x52, x50, x42, x39);
x53 = (x52 + x40);
fiat_p256_addcarryx_u64(&x54, &x55, 0x0, x31, x45);
fiat_p256_addcarryx_u64(&x56, &x57, x55, x33, x47);
fiat_p256_addcarryx_u64(&x58, &x59, x57, x35, x49);
fiat_p256_addcarryx_u64(&x60, &x61, x59, x37, x51);
fiat_p256_addcarryx_u64(&x62, &x63, x61, x38, x53);
fiat_p256_mulx_u64(&x64, &x65, x54, UINT64_C(0xffffffff00000001));
fiat_p256_mulx_u64(&x66, &x67, x54, UINT32_C(0xffffffff));
fiat_p256_mulx_u64(&x68, &x69, x54, UINT64_C(0xffffffffffffffff));
fiat_p256_addcarryx_u64(&x70, &x71, 0x0, x69, x66);
x72 = (x71 + x67);
fiat_p256_addcarryx_u64(&x73, &x74, 0x0, x54, x68);
fiat_p256_addcarryx_u64(&x75, &x76, x74, x56, x70);
fiat_p256_addcarryx_u64(&x77, &x78, x76, x58, x72);
fiat_p256_addcarryx_u64(&x79, &x80, x78, x60, x64);
fiat_p256_addcarryx_u64(&x81, &x82, x80, x62, x65);
x83 = ((uint64_t)x82 + x63);
fiat_p256_mulx_u64(&x84, &x85, x2, (arg1[3]));
fiat_p256_mulx_u64(&x86, &x87, x2, (arg1[2]));
fiat_p256_mulx_u64(&x88, &x89, x2, (arg1[1]));
fiat_p256_mulx_u64(&x90, &x91, x2, (arg1[0]));
fiat_p256_addcarryx_u64(&x92, &x93, 0x0, x91, x88);
fiat_p256_addcarryx_u64(&x94, &x95, x93, x89, x86);
fiat_p256_addcarryx_u64(&x96, &x97, x95, x87, x84);
x98 = (x97 + x85);
fiat_p256_addcarryx_u64(&x99, &x100, 0x0, x75, x90);
fiat_p256_addcarryx_u64(&x101, &x102, x100, x77, x92);
fiat_p256_addcarryx_u64(&x103, &x104, x102, x79, x94);
fiat_p256_addcarryx_u64(&x105, &x106, x104, x81, x96);
fiat_p256_addcarryx_u64(&x107, &x108, x106, x83, x98);
fiat_p256_mulx_u64(&x109, &x110, x99, UINT64_C(0xffffffff00000001));
fiat_p256_mulx_u64(&x111, &x112, x99, UINT32_C(0xffffffff));
fiat_p256_mulx_u64(&x113, &x114, x99, UINT64_C(0xffffffffffffffff));
fiat_p256_addcarryx_u64(&x115, &x116, 0x0, x114, x111);
x117 = (x116 + x112);
fiat_p256_addcarryx_u64(&x118, &x119, 0x0, x99, x113);
fiat_p256_addcarryx_u64(&x120, &x121, x119, x101, x115);
fiat_p256_addcarryx_u64(&x122, &x123, x121, x103, x117);
fiat_p256_addcarryx_u64(&x124, &x125, x123, x105, x109);
fiat_p256_addcarryx_u64(&x126, &x127, x125, x107, x110);
x128 = ((uint64_t)x127 + x108);
fiat_p256_mulx_u64(&x129, &x130, x3, (arg1[3]));
fiat_p256_mulx_u64(&x131, &x132, x3, (arg1[2]));
fiat_p256_mulx_u64(&x133, &x134, x3, (arg1[1]));
fiat_p256_mulx_u64(&x135, &x136, x3, (arg1[0]));
fiat_p256_addcarryx_u64(&x137, &x138, 0x0, x136, x133);
fiat_p256_addcarryx_u64(&x139, &x140, x138, x134, x131);
fiat_p256_addcarryx_u64(&x141, &x142, x140, x132, x129);
x143 = (x142 + x130);
fiat_p256_addcarryx_u64(&x144, &x145, 0x0, x120, x135);
fiat_p256_addcarryx_u64(&x146, &x147, x145, x122, x137);
fiat_p256_addcarryx_u64(&x148, &x149, x147, x124, x139);
fiat_p256_addcarryx_u64(&x150, &x151, x149, x126, x141);
fiat_p256_addcarryx_u64(&x152, &x153, x151, x128, x143);
fiat_p256_mulx_u64(&x154, &x155, x144, UINT64_C(0xffffffff00000001));
fiat_p256_mulx_u64(&x156, &x157, x144, UINT32_C(0xffffffff));
fiat_p256_mulx_u64(&x158, &x159, x144, UINT64_C(0xffffffffffffffff));
fiat_p256_addcarryx_u64(&x160, &x161, 0x0, x159, x156);
x162 = (x161 + x157);
fiat_p256_addcarryx_u64(&x163, &x164, 0x0, x144, x158);
fiat_p256_addcarryx_u64(&x165, &x166, x164, x146, x160);
fiat_p256_addcarryx_u64(&x167, &x168, x166, x148, x162);
fiat_p256_addcarryx_u64(&x169, &x170, x168, x150, x154);
fiat_p256_addcarryx_u64(&x171, &x172, x170, x152, x155);
x173 = ((uint64_t)x172 + x153);
fiat_p256_subborrowx_u64(&x174, &x175, 0x0, x165, UINT64_C(0xffffffffffffffff));
fiat_p256_subborrowx_u64(&x176, &x177, x175, x167, UINT32_C(0xffffffff));
fiat_p256_subborrowx_u64(&x178, &x179, x177, x169, 0x0);
fiat_p256_subborrowx_u64(&x180, &x181, x179, x171, UINT64_C(0xffffffff00000001));
fiat_p256_subborrowx_u64(&x182, &x183, x181, x173, 0x0);
fiat_p256_cmovznz_u64(&x184, x183, x174, x165);
fiat_p256_cmovznz_u64(&x185, x183, x176, x167);
fiat_p256_cmovznz_u64(&x186, x183, x178, x169);
fiat_p256_cmovznz_u64(&x187, x183, x180, x171);
out1[0] = x184;
out1[1] = x185;
out1[2] = x186;
out1[3] = x187;
}
/*
* The function fiat_p256_add adds two field elements in the Montgomery domain.
*
* Preconditions:
* 0 ≤ eval arg1 < m
* 0 ≤ eval arg2 < m
* Postconditions:
* eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) + eval (from_montgomery arg2)) mod m
* 0 ≤ eval out1 < m
*
*/
static FIAT_P256_FIAT_INLINE void fiat_p256_add(fiat_p256_montgomery_domain_field_element out1, const fiat_p256_montgomery_domain_field_element arg1, const fiat_p256_montgomery_domain_field_element arg2) {
uint64_t x1;
fiat_p256_uint1 x2;
uint64_t x3;
fiat_p256_uint1 x4;
uint64_t x5;
fiat_p256_uint1 x6;
uint64_t x7;
fiat_p256_uint1 x8;
uint64_t x9;
fiat_p256_uint1 x10;
uint64_t x11;
fiat_p256_uint1 x12;
uint64_t x13;
fiat_p256_uint1 x14;
uint64_t x15;
fiat_p256_uint1 x16;
uint64_t x17;
fiat_p256_uint1 x18;
uint64_t x19;
uint64_t x20;
uint64_t x21;
uint64_t x22;
fiat_p256_addcarryx_u64(&x1, &x2, 0x0, (arg1[0]), (arg2[0]));
fiat_p256_addcarryx_u64(&x3, &x4, x2, (arg1[1]), (arg2[1]));
fiat_p256_addcarryx_u64(&x5, &x6, x4, (arg1[2]), (arg2[2]));
fiat_p256_addcarryx_u64(&x7, &x8, x6, (arg1[3]), (arg2[3]));
fiat_p256_subborrowx_u64(&x9, &x10, 0x0, x1, UINT64_C(0xffffffffffffffff));
fiat_p256_subborrowx_u64(&x11, &x12, x10, x3, UINT32_C(0xffffffff));
fiat_p256_subborrowx_u64(&x13, &x14, x12, x5, 0x0);
fiat_p256_subborrowx_u64(&x15, &x16, x14, x7, UINT64_C(0xffffffff00000001));
fiat_p256_subborrowx_u64(&x17, &x18, x16, x8, 0x0);
fiat_p256_cmovznz_u64(&x19, x18, x9, x1);
fiat_p256_cmovznz_u64(&x20, x18, x11, x3);
fiat_p256_cmovznz_u64(&x21, x18, x13, x5);
fiat_p256_cmovznz_u64(&x22, x18, x15, x7);
out1[0] = x19;
out1[1] = x20;
out1[2] = x21;
out1[3] = x22;
}
/*
* The function fiat_p256_sub subtracts two field elements in the Montgomery domain.
*
* Preconditions:
* 0 ≤ eval arg1 < m
* 0 ≤ eval arg2 < m
* Postconditions:
* eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) - eval (from_montgomery arg2)) mod m
* 0 ≤ eval out1 < m
*
*/
static FIAT_P256_FIAT_INLINE void fiat_p256_sub(fiat_p256_montgomery_domain_field_element out1, const fiat_p256_montgomery_domain_field_element arg1, const fiat_p256_montgomery_domain_field_element arg2) {
uint64_t x1;
fiat_p256_uint1 x2;
uint64_t x3;
fiat_p256_uint1 x4;
uint64_t x5;
fiat_p256_uint1 x6;
uint64_t x7;
fiat_p256_uint1 x8;
uint64_t x9;
uint64_t x10;
fiat_p256_uint1 x11;
uint64_t x12;
fiat_p256_uint1 x13;
uint64_t x14;
fiat_p256_uint1 x15;
uint64_t x16;
fiat_p256_uint1 x17;
fiat_p256_subborrowx_u64(&x1, &x2, 0x0, (arg1[0]), (arg2[0]));
fiat_p256_subborrowx_u64(&x3, &x4, x2, (arg1[1]), (arg2[1]));
fiat_p256_subborrowx_u64(&x5, &x6, x4, (arg1[2]), (arg2[2]));
fiat_p256_subborrowx_u64(&x7, &x8, x6, (arg1[3]), (arg2[3]));
fiat_p256_cmovznz_u64(&x9, x8, 0x0, UINT64_C(0xffffffffffffffff));
fiat_p256_addcarryx_u64(&x10, &x11, 0x0, x1, x9);
fiat_p256_addcarryx_u64(&x12, &x13, x11, x3, (x9 & UINT32_C(0xffffffff)));
fiat_p256_addcarryx_u64(&x14, &x15, x13, x5, 0x0);
fiat_p256_addcarryx_u64(&x16, &x17, x15, x7, (x9 & UINT64_C(0xffffffff00000001)));
out1[0] = x10;
out1[1] = x12;
out1[2] = x14;
out1[3] = x16;
}
/*
* The function fiat_p256_opp negates a field element in the Montgomery domain.
*
* Preconditions:
* 0 ≤ eval arg1 < m
* Postconditions:
* eval (from_montgomery out1) mod m = -eval (from_montgomery arg1) mod m
* 0 ≤ eval out1 < m
*
*/
static FIAT_P256_FIAT_INLINE void fiat_p256_opp(fiat_p256_montgomery_domain_field_element out1, const fiat_p256_montgomery_domain_field_element arg1) {
uint64_t x1;
fiat_p256_uint1 x2;
uint64_t x3;
fiat_p256_uint1 x4;
uint64_t x5;
fiat_p256_uint1 x6;
uint64_t x7;
fiat_p256_uint1 x8;
uint64_t x9;
uint64_t x10;
fiat_p256_uint1 x11;
uint64_t x12;
fiat_p256_uint1 x13;
uint64_t x14;
fiat_p256_uint1 x15;
uint64_t x16;
fiat_p256_uint1 x17;
fiat_p256_subborrowx_u64(&x1, &x2, 0x0, 0x0, (arg1[0]));
fiat_p256_subborrowx_u64(&x3, &x4, x2, 0x0, (arg1[1]));
fiat_p256_subborrowx_u64(&x5, &x6, x4, 0x0, (arg1[2]));
fiat_p256_subborrowx_u64(&x7, &x8, x6, 0x0, (arg1[3]));
fiat_p256_cmovznz_u64(&x9, x8, 0x0, UINT64_C(0xffffffffffffffff));
fiat_p256_addcarryx_u64(&x10, &x11, 0x0, x1, x9);
fiat_p256_addcarryx_u64(&x12, &x13, x11, x3, (x9 & UINT32_C(0xffffffff)));
fiat_p256_addcarryx_u64(&x14, &x15, x13, x5, 0x0);
fiat_p256_addcarryx_u64(&x16, &x17, x15, x7, (x9 & UINT64_C(0xffffffff00000001)));
out1[0] = x10;
out1[1] = x12;
out1[2] = x14;
out1[3] = x16;
}
/*
* The function fiat_p256_nonzero outputs a single non-zero word if the input is non-zero and zero otherwise.
*
* Preconditions:
* 0 ≤ eval arg1 < m
* Postconditions:
* out1 = 0 ↔ eval (from_montgomery arg1) mod m = 0
*
* Input Bounds:
* arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
* Output Bounds:
* out1: [0x0 ~> 0xffffffffffffffff]
*/
static FIAT_P256_FIAT_INLINE void fiat_p256_nonzero(uint64_t* out1, const uint64_t arg1[4]) {
uint64_t x1;
x1 = ((arg1[0]) | ((arg1[1]) | ((arg1[2]) | (arg1[3]))));
*out1 = x1;
}
/*
* The function fiat_p256_selectznz is a multi-limb conditional select.
*
* Postconditions:
* eval out1 = (if arg1 = 0 then eval arg2 else eval arg3)
*
* Input Bounds:
* arg1: [0x0 ~> 0x1]
* arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
* arg3: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
* Output Bounds:
* out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
*/
static FIAT_P256_FIAT_INLINE void fiat_p256_selectznz(uint64_t out1[4], fiat_p256_uint1 arg1, const uint64_t arg2[4], const uint64_t arg3[4]) {
uint64_t x1;
uint64_t x2;
uint64_t x3;
uint64_t x4;
fiat_p256_cmovznz_u64(&x1, arg1, (arg2[0]), (arg3[0]));
fiat_p256_cmovznz_u64(&x2, arg1, (arg2[1]), (arg3[1]));
fiat_p256_cmovznz_u64(&x3, arg1, (arg2[2]), (arg3[2]));
fiat_p256_cmovznz_u64(&x4, arg1, (arg2[3]), (arg3[3]));
out1[0] = x1;
out1[1] = x2;
out1[2] = x3;
out1[3] = x4;
}

File diff suppressed because it is too large Load Diff