in zlib/crc32.c [575:665]
unsigned long ZEXPORT crc32_z(unsigned long crc, const unsigned char FAR *buf,
z_size_t len) {
z_crc_t val;
z_word_t crc1, crc2;
const z_word_t *word;
z_word_t val0, val1, val2;
z_size_t last, last2, i;
z_size_t num;
/* Return initial CRC, if requested. */
if (buf == Z_NULL) return 0;
#ifdef DYNAMIC_CRC_TABLE
once(&made, make_crc_table);
#endif /* DYNAMIC_CRC_TABLE */
/* Pre-condition the CRC */
crc = (~crc) & 0xffffffff;
/* Compute the CRC up to a word boundary. */
while (len && ((z_size_t)buf & 7) != 0) {
len--;
val = *buf++;
__asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
}
/* Prepare to compute the CRC on full 64-bit words word[0..num-1]. */
word = (z_word_t const *)buf;
num = len >> 3;
len &= 7;
/* Do three interleaved CRCs to realize the throughput of one crc32x
instruction per cycle. Each CRC is calculated on Z_BATCH words. The
three CRCs are combined into a single CRC after each set of batches. */
while (num >= 3 * Z_BATCH) {
crc1 = 0;
crc2 = 0;
for (i = 0; i < Z_BATCH; i++) {
val0 = word[i];
val1 = word[i + Z_BATCH];
val2 = word[i + 2 * Z_BATCH];
__asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
__asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
__asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
}
word += 3 * Z_BATCH;
num -= 3 * Z_BATCH;
crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc1;
crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc2;
}
/* Do one last smaller batch with the remaining words, if there are enough
to pay for the combination of CRCs. */
last = num / 3;
if (last >= Z_BATCH_MIN) {
last2 = last << 1;
crc1 = 0;
crc2 = 0;
for (i = 0; i < last; i++) {
val0 = word[i];
val1 = word[i + last];
val2 = word[i + last2];
__asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
__asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
__asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
}
word += 3 * last;
num -= 3 * last;
val = x2nmodp(last, 6);
crc = multmodp(val, crc) ^ crc1;
crc = multmodp(val, crc) ^ crc2;
}
/* Compute the CRC on any remaining words. */
for (i = 0; i < num; i++) {
val0 = word[i];
__asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
}
word += num;
/* Complete the CRC on any remaining bytes. */
buf = (const unsigned char FAR *)word;
while (len) {
len--;
val = *buf++;
__asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
}
/* Return the CRC, post-conditioned. */
return crc ^ 0xffffffff;
}