in FourQ_64bit_and_portable/table_lookup.h [162:282]
void table_lookup_fixed_base(point_precomp_t* table, point_precomp_t P, unsigned int digit, unsigned int sign)
{ // Constant-time table lookup to extract a point represented as (x+y,y-x,2t) corresponding to extended twisted Edwards coordinates (X:Y:Z:T) with Z=1
// Inputs: sign, digit, table containing VPOINTS_FIXEDBASE = 2^(W_FIXEDBASE-1) points
// Output: if sign=0 then P = table[digit], else if (sign=-1) then P = -table[digit]
#if (SIMD_SUPPORT == AVX2_SUPPORT)
__m256i point[3], temp_point[3], full_mask;
unsigned int i;
int mask;
point[0] = _mm256_loadu_si256((__m256i*)table[0]->xy); // point = table[0]
point[1] = _mm256_loadu_si256((__m256i*)table[0]->yx);
point[2] = _mm256_loadu_si256((__m256i*)table[0]->t2);
for (i = 1; i < VPOINTS_FIXEDBASE; i++)
{
digit--;
// While digit>=0 mask = 0xFF...F else sign = 0x00...0
mask = (int)(digit >> (8*sizeof(digit)-1)) - 1;
temp_point[0] = _mm256_loadu_si256((__m256i*)table[i]->xy); // temp_point = table[i]
temp_point[1] = _mm256_loadu_si256((__m256i*)table[i]->yx);
temp_point[2] = _mm256_loadu_si256((__m256i*)table[i]->t2);
// If mask = 0x00...0 then point = point, else if mask = 0xFF...F then point = temp_point
full_mask = _mm256_set1_epi32(mask);
temp_point[0] = _mm256_xor_si256(point[0], temp_point[0]);
temp_point[1] = _mm256_xor_si256(point[1], temp_point[1]);
temp_point[2] = _mm256_xor_si256(point[2], temp_point[2]);
point[0] = _mm256_xor_si256(_mm256_and_si256(temp_point[0], full_mask), point[0]);
point[1] = _mm256_xor_si256(_mm256_and_si256(temp_point[1], full_mask), point[1]);
point[2] = _mm256_xor_si256(_mm256_and_si256(temp_point[2], full_mask), point[2]);
}
temp_point[2] = _mm256_loadu_si256((__m256i*)point+2);
temp_point[0] = _mm256_loadu_si256((__m256i*)point+1); // point: x+y,y-x,2dt coordinate, temp_point: y-x,x+y,-2dt coordinate
temp_point[1] = _mm256_loadu_si256((__m256i*)point);
full_mask = _mm256_set1_epi32((int)sign);
fpneg1271((digit_t*)temp_point+8); // Negate 2dt coordinate
fpneg1271((digit_t*)temp_point+10); // If sign = 0xFF...F then choose negative of the point
point[0] = _mm256_xor_si256(_mm256_and_si256(_mm256_xor_si256(point[0], temp_point[0]), full_mask), point[0]);
point[1] = _mm256_xor_si256(_mm256_and_si256(_mm256_xor_si256(point[1], temp_point[1]), full_mask), point[1]);
point[2] = _mm256_xor_si256(_mm256_and_si256(_mm256_xor_si256(point[2], temp_point[2]), full_mask), point[2]);
_mm256_storeu_si256((__m256i*)P->xy, point[0]);
_mm256_storeu_si256((__m256i*)P->yx, point[1]);
_mm256_storeu_si256((__m256i*)P->t2, point[2]);
#elif (SIMD_SUPPORT >= AVX_SUPPORT)
__m256d point[3], temp_point[3], full_mask;
unsigned int i;
int mask;
point[0] = _mm256_loadu_pd((double const*)table[0]->xy); // point = table[0]
point[1] = _mm256_loadu_pd((double const*)table[0]->yx);
point[2] = _mm256_loadu_pd((double const*)table[0]->t2);
for (i = 1; i < VPOINTS_FIXEDBASE; i++)
{
digit--;
// While digit>=0 mask = 0xFF...F else sign = 0x00...0
mask = (int)(digit >> (8*sizeof(digit)-1)) - 1;
full_mask = _mm256_set1_pd((double)mask);
temp_point[0] = _mm256_loadu_pd((double const*)table[i]->xy); // temp_point = table[i+1]
temp_point[1] = _mm256_loadu_pd((double const*)table[i]->yx);
temp_point[2] = _mm256_loadu_pd((double const*)table[i]->t2);
// If mask = 0x00...0 then point = point, else if mask = 0xFF...F then point = temp_point
point[0] = _mm256_blendv_pd(point[0], temp_point[0], full_mask);
point[1] = _mm256_blendv_pd(point[1], temp_point[1], full_mask);
point[2] = _mm256_blendv_pd(point[2], temp_point[2], full_mask);
}
temp_point[2] = _mm256_loadu_pd((double const*)point+2*4); // point: x+y,y-x,2dt coordinate, temp_point: y-x,x+y,-2dt coordinate
temp_point[0] = _mm256_loadu_pd((double const*)point+1*4);
temp_point[1] = _mm256_loadu_pd((double const*)point);
full_mask = _mm256_set1_pd((double)((int)sign));
fpneg1271((digit_t*)temp_point+8); // Negate 2dt coordinate
fpneg1271((digit_t*)temp_point+10);
point[0] = _mm256_blendv_pd(point[0], temp_point[0], full_mask); // If sign = 0xFF...F then choose negative of the point
point[1] = _mm256_blendv_pd(point[1], temp_point[1], full_mask);
point[2] = _mm256_blendv_pd(point[2], temp_point[2], full_mask);
_mm256_storeu_pd((double*)P->xy, point[0]);
_mm256_storeu_pd((double*)P->yx, point[1]);
_mm256_storeu_pd((double*)P->t2, point[2]);
#else
point_precomp_t point, temp_point;
unsigned int i, j;
digit_t mask;
ecccopy_precomp_fixed_base(table[0], point); // point = table[0]
for (i = 1; i < VPOINTS_FIXEDBASE; i++)
{
digit--;
// While digit>=0 mask = 0xFF...F else sign = 0x00...0
mask = (digit_t)(digit >> (8*sizeof(digit)-1)) - 1;
ecccopy_precomp_fixed_base(table[i], temp_point); // temp_point = table[i]
// If mask = 0x00...0 then point = point, else if mask = 0xFF...F then point = temp_point
for (j = 0; j < NWORDS_FIELD; j++) {
point->xy[0][j] = (mask & (point->xy[0][j] ^ temp_point->xy[0][j])) ^ point->xy[0][j];
point->xy[1][j] = (mask & (point->xy[1][j] ^ temp_point->xy[1][j])) ^ point->xy[1][j];
point->yx[0][j] = (mask & (point->yx[0][j] ^ temp_point->yx[0][j])) ^ point->yx[0][j];
point->yx[1][j] = (mask & (point->yx[1][j] ^ temp_point->yx[1][j])) ^ point->yx[1][j];
point->t2[0][j] = (mask & (point->t2[0][j] ^ temp_point->t2[0][j])) ^ point->t2[0][j];
point->t2[1][j] = (mask & (point->t2[1][j] ^ temp_point->t2[1][j])) ^ point->t2[1][j];
}
}
fp2copy1271(point->t2, temp_point->t2);
fp2copy1271(point->xy, temp_point->yx); // point: x+y,y-x,2dt coordinate, temp_point: y-x,x+y,-2dt coordinate
fp2copy1271(point->yx, temp_point->xy);
fpneg1271(temp_point->t2[0]); // Negate 2dt coordinate
fpneg1271(temp_point->t2[1]);
for (j = 0; j < NWORDS_FIELD; j++) { // If sign = 0xFF...F then choose negative of the point
point->xy[0][j] = ((digit_t)((int)sign) & (point->xy[0][j] ^ temp_point->xy[0][j])) ^ point->xy[0][j];
point->xy[1][j] = ((digit_t)((int)sign) & (point->xy[1][j] ^ temp_point->xy[1][j])) ^ point->xy[1][j];
point->yx[0][j] = ((digit_t)((int)sign) & (point->yx[0][j] ^ temp_point->yx[0][j])) ^ point->yx[0][j];
point->yx[1][j] = ((digit_t)((int)sign) & (point->yx[1][j] ^ temp_point->yx[1][j])) ^ point->yx[1][j];
point->t2[0][j] = ((digit_t)((int)sign) & (point->t2[0][j] ^ temp_point->t2[0][j])) ^ point->t2[0][j];
point->t2[1][j] = ((digit_t)((int)sign) & (point->t2[1][j] ^ temp_point->t2[1][j])) ^ point->t2[1][j];
}
ecccopy_precomp_fixed_base(point, P);
#endif
}