in source/intel/asm/crc32c_sse42_asm.c [214:285]
static inline uint32_t s_crc32c_sse42_clmul_3072(const uint8_t *input, uint32_t crc) {
__asm__ __volatile__(
"enter_3072_%=:"
"xor %%r11, %%r11 # zero all 64 bits in r11, will track crc1 \n"
"xor %%r10, %%r10 # zero all 64 bits in r10, will track crc2 \n"
"movl $16, %%r8d # Loop 16 times through 64 byte chunks in 3 parallel stripes \n"
"loop_3072_%=:"
"prefetcht0 128(%[in]) # \n"
"prefetcht0 1152(%[in]) # \n"
"prefetcht0 2176(%[in]) # \n"
"crc32q 0(%[in]), %%rcx # crc0: stripe0 \n"
"crc32q 1024(%[in]), %%r11 # crc1: stripe1 \n"
"crc32q 2048(%[in]), %%r10 # crc2: stripe2 \n"
"crc32q 8(%[in]), %%rcx # crc0: stripe0 \n"
"crc32q 1032(%[in]), %%r11 # crc1: stripe1 \n"
"crc32q 2056(%[in]), %%r10 # crc2: stripe2 \n"
"crc32q 16(%[in]), %%rcx # crc0: stripe0 \n"
"crc32q 1040(%[in]), %%r11 # crc1: stripe1 \n"
"crc32q 2064(%[in]), %%r10 # crc2: stripe2 \n"
"crc32q 24(%[in]), %%rcx # crc0: stripe0 \n"
"crc32q 1048(%[in]), %%r11 # crc1: stripe1 \n"
"crc32q 2072(%[in]), %%r10 # crc2: stripe2 \n"
"crc32q 32(%[in]), %%rcx # crc0: stripe0 \n"
"crc32q 1056(%[in]), %%r11 # crc1: stripe1 \n"
"crc32q 2080(%[in]), %%r10 # crc2: stripe2 \n"
"crc32q 40(%[in]), %%rcx # crc0: stripe0 \n"
"crc32q 1064(%[in]), %%r11 # crc1: stripe1 \n"
"crc32q 2088(%[in]), %%r10 # crc2: stripe2 \n"
"crc32q 48(%[in]), %%rcx # crc0: stripe0 \n"
"crc32q 1072(%[in]), %%r11 # crc1: stripe1 \n"
"crc32q 2096(%[in]), %%r10 # crc2: stripe2 \n"
"crc32q 56(%[in]), %%rcx # crc0: stripe0 \n"
"crc32q 1080(%[in]), %%r11 # crc1: stripe1 \n"
"crc32q 2104(%[in]), %%r10 # crc2: stripe2 \n"
"add $64, %[in] # \n"
"sub $1, %%r8d # \n"
"jnz loop_3072_%= # \n"
FOLD_K1K2(
3072,
$0xa51b6135,
$0x170076fa) /* Magic Constants used to fold crc stripes into ecx
output registers
[crc] is an input and and output so it is marked read/write (i.e. "+c")
we clobber the register for [input] (via add instruction) so we must also
tag it read/write (i.e. "+d") in the list of outputs to tell gcc about the clobber*/
: "+c"(crc), "+d"(input)
/* input registers
the numeric values match the position of the output registers */
: [ crc ] "c"(crc), [ in ] "d"(input)
/* additional clobbered registers
"cc" is the flags - we add and sub, so the flags are also clobbered */
: "%r8", "%r9", "%r11", "%r10", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "cc");
return crc;
}