void gf2x_mul_base_port()

in src/gf2x/gf2x_mul_base_portable.c [19:66]


void gf2x_mul_base_port(OUT uint64_t *c,
                        IN const uint64_t *a,
                        IN const uint64_t *b)
{
  uint64_t       h = 0, l = 0, g1, g2, u[8];
  const uint64_t w  = 64;
  const uint64_t s  = 3;
  const uint64_t a0 = a[0];
  const uint64_t b0 = b[0];

  // Multiplying 64 bits by 7 can results in an overflow of 3 bits.
  // Therefore, these bits are masked out, and are treated in step 3.
  const uint64_t b0m = b0 & MASK(61);

  // Step 1: Calculate a multiplication table with 8 entries.
  u[0] = 0;
  u[1] = b0m;
  u[2] = u[1] << 1;
  u[3] = u[2] ^ b0m;
  u[4] = u[2] << 1;
  u[5] = u[4] ^ b0m;
  u[6] = u[3] << 1;
  u[7] = u[6] ^ b0m;

  // Step 2: Multiply two elements in parallel in positions i, i+s
  l = u[LSB3(a0)] ^ (u[LSB3(a0 >> 3)] << 3);
  h = (u[LSB3(a0 >> 3)] >> 61);

  for(size_t i = (2 * s); i < w; i += (2 * s)) {
    const size_t i2 = (i + s);

    g1 = u[LSB3(a0 >> i)];
    g2 = u[LSB3(a0 >> i2)];

    l ^= (g1 << i) ^ (g2 << i2);
    h ^= (g1 >> (w - i)) ^ (g2 >> (w - i2));
  }

  // Step 3: Multiply the last three bits.
  for(size_t i = 61; i < 64; i++) {
    uint64_t mask = (-((b0 >> i) & 1));
    l ^= ((a0 << i) & mask);
    h ^= ((a0 >> (w - i)) & mask);
  }

  c[0] = l;
  c[1] = h;
}