in src/sha3/fips202x4.c [37:97]
static void keccak_absorb4x(__m256i *s, unsigned int r, const unsigned char *m0, const unsigned char *m1, const unsigned char *m2, const unsigned char *m3,
unsigned long long int mlen, unsigned char p)
{
unsigned long long i;
unsigned char t0[200];
unsigned char t1[200];
unsigned char t2[200];
unsigned char t3[200];
unsigned long long *ss = (unsigned long long *)s;
while (mlen >= r)
{
for (i = 0; i < r / 8; ++i)
{
ss[4*i+0] ^= load64(m0 + 8 * i);
ss[4*i+1] ^= load64(m1 + 8 * i);
ss[4*i+2] ^= load64(m2 + 8 * i);
ss[4*i+3] ^= load64(m3 + 8 * i);
}
KeccakF1600_StatePermute4x(s);
mlen -= r;
m0 += r;
m1 += r;
m2 += r;
m3 += r;
}
for (i = 0; i < r; ++i)
{
t0[i] = 0;
t1[i] = 0;
t2[i] = 0;
t3[i] = 0;
}
for (i = 0; i < mlen; ++i)
{
t0[i] = m0[i];
t1[i] = m1[i];
t2[i] = m2[i];
t3[i] = m3[i];
}
t0[i] = p;
t1[i] = p;
t2[i] = p;
t3[i] = p;
t0[r - 1] |= 128;
t1[r - 1] |= 128;
t2[r - 1] |= 128;
t3[r - 1] |= 128;
for (i = 0; i < r / 8; ++i)
{
ss[4*i+0] ^= load64(t0 + 8 * i);
ss[4*i+1] ^= load64(t1 + 8 * i);
ss[4*i+2] ^= load64(t2 + 8 * i);
ss[4*i+3] ^= load64(t3 + 8 * i);
}
}