in FourQ_64bit_and_portable/crypto_util.c [146:213]
void Montgomery_inversion_mod_order(const digit_t* ma, digit_t* mc)
{ // (Non-constant time) Montgomery inversion modulo the curve order using a^(-1) = a^(order-2) mod order
// This function uses the sliding-window method
sdigit_t i = 256;
unsigned int j, nwords = NWORDS_ORDER;
digit_t temp, bit = 0, count, mod2, k_EXPON = 5; // Fixing parameter k to 5 for the sliding windows method
digit_t modulus2[NWORDS_ORDER] = {0}, npoints = 16;
digit_t input_a[NWORDS_ORDER];
digit_t table[16][NWORDS_ORDER]; // Fixing the number of precomputed elements to 16 (assuming k = 5)
digit_t mask = (digit_t)1 << (sizeof(digit_t)*8 - 1); // 0x800...000
digit_t mask2 = ~((digit_t)(-1) >> k_EXPON); // 0xF800...000, assuming k = 5
// SECURITY NOTE: this function does not run in constant time because the modulus is assumed to be public.
modulus2[0] = 2;
subtract((digit_t*)&curve_order, modulus2, modulus2, nwords); // modulus-2
// Precomputation stage
memmove((unsigned char*)&table[0], (unsigned char*)ma, 32); // table[0] = ma
Montgomery_multiply_mod_order(ma, ma, input_a); // ma^2
for (j = 0; j < npoints - 1; j++) {
Montgomery_multiply_mod_order(table[j], input_a, table[j+1]); // table[j+1] = table[j] * ma^2
}
while (bit != 1) { // Shift (modulus-2) to the left until getting first bit 1
i--;
temp = 0;
for (j = 0; j < nwords; j++) {
bit = (modulus2[j] & mask) >> (sizeof(digit_t)*8 - 1);
modulus2[j] = (modulus2[j] << 1) | temp;
temp = bit;
}
}
// Evaluation stage
memmove((unsigned char*)mc, (unsigned char*)ma, 32);
bit = (modulus2[nwords-1] & mask) >> (sizeof(digit_t)*8 - 1);
while (i > 0) {
if (bit == 0) { // Square accumulated value because bit = 0 and shift (modulus-2) one bit to the left
Montgomery_multiply_mod_order(mc, mc, mc); // mc = mc^2
i--;
for (j = (nwords - 1); j > 0; j--) {
SHIFTL(modulus2[j], modulus2[j-1], 1, modulus2[j], RADIX);
}
modulus2[0] = modulus2[0] << 1;
} else { // "temp" will store the longest odd bitstring with "count" bits s.t. temp <= 2^k - 1
count = k_EXPON;
temp = (modulus2[nwords-1] & mask2) >> (sizeof(digit_t)*8 - k_EXPON); // Extracting next k bits to the left
mod2 = temp & 1;
while (mod2 == 0) { // if even then shift to the right and adjust count
temp = (temp >> 1);
mod2 = temp & 1;
count--;
}
for (j = 0; j < count; j++) { // mc = mc^count
Montgomery_multiply_mod_order(mc, mc, mc);
}
Montgomery_multiply_mod_order(mc, table[(temp-1) >> 1], mc); // mc = mc * table[(temp-1)/2]
i = i - count;
for (j = (nwords - 1); j > 0; j--) { // Shift (modulus-2) "count" bits to the left
SHIFTL(modulus2[j], modulus2[j-1], count, modulus2[j], RADIX);
}
modulus2[0] = modulus2[0] << count;
}
bit = (modulus2[nwords - 1] & mask) >> (sizeof(digit_t)*8 - 1);
}
}