void table_lookup_fixed_base()

in FourQ_64bit_and_portable/table_lookup.h [162:282]


void table_lookup_fixed_base(point_precomp_t* table, point_precomp_t P, unsigned int digit, unsigned int sign)
{ // Constant-time table lookup to extract a point represented as (x+y,y-x,2t) corresponding to extended twisted Edwards coordinates (X:Y:Z:T) with Z=1
  // Inputs: sign, digit, table containing VPOINTS_FIXEDBASE = 2^(W_FIXEDBASE-1) points
  // Output: if sign=0 then P = table[digit], else if (sign=-1) then P = -table[digit]

#if (SIMD_SUPPORT == AVX2_SUPPORT)
    __m256i point[3], temp_point[3], full_mask; 
    unsigned int i;
    int mask;
    
    point[0] = _mm256_loadu_si256((__m256i*)table[0]->xy);                  // point = table[0] 
    point[1] = _mm256_loadu_si256((__m256i*)table[0]->yx);  
    point[2] = _mm256_loadu_si256((__m256i*)table[0]->t2); 

    for (i = 1; i < VPOINTS_FIXEDBASE; i++) 
    { 
        digit--;
        // While digit>=0 mask = 0xFF...F else sign = 0x00...0
        mask = (int)(digit >> (8*sizeof(digit)-1)) - 1;
        temp_point[0] = _mm256_loadu_si256((__m256i*)table[i]->xy);         // temp_point = table[i]
        temp_point[1] = _mm256_loadu_si256((__m256i*)table[i]->yx);
        temp_point[2] = _mm256_loadu_si256((__m256i*)table[i]->t2);
        // If mask = 0x00...0 then point = point, else if mask = 0xFF...F then point = temp_point
        full_mask = _mm256_set1_epi32(mask);
        temp_point[0] = _mm256_xor_si256(point[0], temp_point[0]);
        temp_point[1] = _mm256_xor_si256(point[1], temp_point[1]);
        temp_point[2] = _mm256_xor_si256(point[2], temp_point[2]);
        point[0] = _mm256_xor_si256(_mm256_and_si256(temp_point[0], full_mask), point[0]);
        point[1] = _mm256_xor_si256(_mm256_and_si256(temp_point[1], full_mask), point[1]);
        point[2] = _mm256_xor_si256(_mm256_and_si256(temp_point[2], full_mask), point[2]);
    }
                                
    temp_point[2] = _mm256_loadu_si256((__m256i*)point+2); 
    temp_point[0] = _mm256_loadu_si256((__m256i*)point+1);                  // point: x+y,y-x,2dt coordinate, temp_point: y-x,x+y,-2dt coordinate 
    temp_point[1] = _mm256_loadu_si256((__m256i*)point);
    full_mask = _mm256_set1_epi32((int)sign); 
    fpneg1271((digit_t*)temp_point+8);                                      // Negate 2dt coordinate
    fpneg1271((digit_t*)temp_point+10);                                     // If sign = 0xFF...F then choose negative of the point
    point[0] = _mm256_xor_si256(_mm256_and_si256(_mm256_xor_si256(point[0], temp_point[0]), full_mask), point[0]);
    point[1] = _mm256_xor_si256(_mm256_and_si256(_mm256_xor_si256(point[1], temp_point[1]), full_mask), point[1]);
    point[2] = _mm256_xor_si256(_mm256_and_si256(_mm256_xor_si256(point[2], temp_point[2]), full_mask), point[2]);
    _mm256_storeu_si256((__m256i*)P->xy, point[0]);    
    _mm256_storeu_si256((__m256i*)P->yx, point[1]);     
    _mm256_storeu_si256((__m256i*)P->t2, point[2]);
        
#elif (SIMD_SUPPORT >= AVX_SUPPORT)
    __m256d point[3], temp_point[3], full_mask; 
    unsigned int i;
    int mask;

    point[0] = _mm256_loadu_pd((double const*)table[0]->xy);                // point = table[0] 
    point[1] = _mm256_loadu_pd((double const*)table[0]->yx);  
    point[2] = _mm256_loadu_pd((double const*)table[0]->t2);  

    for (i = 1; i < VPOINTS_FIXEDBASE; i++) 
    { 
        digit--;
        // While digit>=0 mask = 0xFF...F else sign = 0x00...0
        mask = (int)(digit >> (8*sizeof(digit)-1)) - 1;
        full_mask = _mm256_set1_pd((double)mask);
        temp_point[0] = _mm256_loadu_pd((double const*)table[i]->xy);       // temp_point = table[i+1]
        temp_point[1] = _mm256_loadu_pd((double const*)table[i]->yx);
        temp_point[2] = _mm256_loadu_pd((double const*)table[i]->t2);
        // If mask = 0x00...0 then point = point, else if mask = 0xFF...F then point = temp_point
        point[0] = _mm256_blendv_pd(point[0], temp_point[0], full_mask);     
        point[1] = _mm256_blendv_pd(point[1], temp_point[1], full_mask);    
        point[2] = _mm256_blendv_pd(point[2], temp_point[2], full_mask); 
    }
                                   
    temp_point[2] = _mm256_loadu_pd((double const*)point+2*4);              // point: x+y,y-x,2dt coordinate, temp_point: y-x,x+y,-2dt coordinate
    temp_point[0] = _mm256_loadu_pd((double const*)point+1*4);  
    temp_point[1] = _mm256_loadu_pd((double const*)point);  
    full_mask = _mm256_set1_pd((double)((int)sign));     
    fpneg1271((digit_t*)temp_point+8);                                      // Negate 2dt coordinate
    fpneg1271((digit_t*)temp_point+10);                                    
    point[0] = _mm256_blendv_pd(point[0], temp_point[0], full_mask);        // If sign = 0xFF...F then choose negative of the point
    point[1] = _mm256_blendv_pd(point[1], temp_point[1], full_mask);
    point[2] = _mm256_blendv_pd(point[2], temp_point[2], full_mask);
    _mm256_storeu_pd((double*)P->xy, point[0]); 
    _mm256_storeu_pd((double*)P->yx, point[1]); 
    _mm256_storeu_pd((double*)P->t2, point[2]);
#else
    point_precomp_t point, temp_point;
    unsigned int i, j;
    digit_t mask;
                                   
    ecccopy_precomp_fixed_base(table[0], point);                             // point = table[0]

    for (i = 1; i < VPOINTS_FIXEDBASE; i++)
    {
        digit--;
        // While digit>=0 mask = 0xFF...F else sign = 0x00...0
        mask = (digit_t)(digit >> (8*sizeof(digit)-1)) - 1;
        ecccopy_precomp_fixed_base(table[i], temp_point);                    // temp_point = table[i] 
        // If mask = 0x00...0 then point = point, else if mask = 0xFF...F then point = temp_point
        for (j = 0; j < NWORDS_FIELD; j++) {
            point->xy[0][j] = (mask & (point->xy[0][j] ^ temp_point->xy[0][j])) ^ point->xy[0][j];
            point->xy[1][j] = (mask & (point->xy[1][j] ^ temp_point->xy[1][j])) ^ point->xy[1][j];
            point->yx[0][j] = (mask & (point->yx[0][j] ^ temp_point->yx[0][j])) ^ point->yx[0][j];
            point->yx[1][j] = (mask & (point->yx[1][j] ^ temp_point->yx[1][j])) ^ point->yx[1][j];
            point->t2[0][j] = (mask & (point->t2[0][j] ^ temp_point->t2[0][j])) ^ point->t2[0][j];
            point->t2[1][j] = (mask & (point->t2[1][j] ^ temp_point->t2[1][j])) ^ point->t2[1][j];
        }
    }
    
    fp2copy1271(point->t2, temp_point->t2);
    fp2copy1271(point->xy, temp_point->yx);                                  // point: x+y,y-x,2dt coordinate, temp_point: y-x,x+y,-2dt coordinate
    fp2copy1271(point->yx, temp_point->xy);                                   
    fpneg1271(temp_point->t2[0]);                                            // Negate 2dt coordinate
    fpneg1271(temp_point->t2[1]);             
    for (j = 0; j < NWORDS_FIELD; j++) {                                     // If sign = 0xFF...F then choose negative of the point
        point->xy[0][j] = ((digit_t)((int)sign) & (point->xy[0][j] ^ temp_point->xy[0][j])) ^ point->xy[0][j];
        point->xy[1][j] = ((digit_t)((int)sign) & (point->xy[1][j] ^ temp_point->xy[1][j])) ^ point->xy[1][j];
        point->yx[0][j] = ((digit_t)((int)sign) & (point->yx[0][j] ^ temp_point->yx[0][j])) ^ point->yx[0][j];
        point->yx[1][j] = ((digit_t)((int)sign) & (point->yx[1][j] ^ temp_point->yx[1][j])) ^ point->yx[1][j];
        point->t2[0][j] = ((digit_t)((int)sign) & (point->t2[0][j] ^ temp_point->t2[0][j])) ^ point->t2[0][j];
        point->t2[1][j] = ((digit_t)((int)sign) & (point->t2[1][j] ^ temp_point->t2[1][j])) ^ point->t2[1][j];
    }                                  
    ecccopy_precomp_fixed_base(point, P); 
#endif
}