in common/checksum/crc.cpp [567:616]
template<typename F> inline __attribute__((always_inline))
uint64_t crc64ecma_hw_portable(const uint8_t *data, size_t nbytes, uint64_t crc, F hw_big) {
if (unlikely(!nbytes || !data)) return crc;
using SIMD = SSE;
using v128 = typename SIMD::v128;
v128 xmm7 = {(long)~crc};
auto& ptr = (const v128*&)data;
if (nbytes >= 256) {
xmm7 = hw_big(data, nbytes, crc);
} else if (nbytes >= 16) {
xmm7 ^= SIMD::loadu(ptr++);
nbytes -= 16;
} else /* 0 < nbytes < 16*/ {
xmm7 ^= SIMD::load_small(data, nbytes);
if (nbytes >= 8) {
auto shf = SIMD::loadu(get_shf_table(nbytes));
xmm7 = SIMD::pshufb(xmm7, shf);
goto _128_done;
} else {
auto shf = SIMD::loadu(get_shf_table(nbytes + 8));
xmm7 = SIMD::pshufb(xmm7, shf);
goto _barrett;
}
}
while (nbytes >= 16) {
xmm7 = SIMD::op(xmm7, RK(1)) ^ SIMD::loadu(ptr++);
nbytes -= 16;
}
if (nbytes) {
auto p = data + nbytes - 16;
auto remainder = SIMD::loadu((v128*)p);
auto xmm0 = SIMD::loadu(get_shf_table(nbytes));
auto xmm2 = xmm7;
xmm7 = SIMD::pshufb(xmm7, xmm0);
xmm0 ^= MASK(3);
xmm2 = SIMD::pshufb(xmm2, xmm0);
xmm2 = SIMD::pblendvb(xmm2, remainder, xmm0);
xmm7 = xmm2 ^ SIMD::op(xmm7, RK(1));
}
_128_done:
xmm7 = SIMD::pclmulqdq<0>(xmm7, RK(5)) ^ SIMD::bsr8(xmm7);
_barrett:
auto t = SIMD::pclmulqdq<0>(xmm7, RK(7));
xmm7 ^= SIMD::pclmulqdq<0x10>(t, RK(7)) ^ SIMD::bsl8(t);
auto p = (uint64_t*)&xmm7;
crc = ~p[1];
return crc;
}