in libgo/runtime/aeshash.c [597:973]
uintptr aeshashbody(void* p, uintptr seed, uintptr size, Slice aeskeysched) {
uint8x16_t *pseed;
uint64x2_t vinit64;
uint8x16_t vinit;
uint8x16_t vseed, vseed2, vseed3, vseed4;
uint8x16_t vseed5, vseed6, vseed7, vseed8;
uint8x16_t vval, vval2, vval3, vval4;
uint8x16_t vval5, vval6, vval7, vval8;
uint8x16_t vvalLoop, vvalLoop2, vvalLoop3, vvalLoop4;
uint8x16_t vvalLoop5, vvalLoop6, vvalLoop7, vvalLoop8;
uint8x16x2_t avval2;
uint8x16x3_t avseed3;
pseed = (uint8x16_t*)(aeskeysched.__values);
// Combined hash seed and length.
vinit64 = vdupq_n_u64(0);
vinit64[0] = (uint64)seed;
vinit64[1] = (uint64)size;
vinit = vreinterpretq_u8_u64(vinit64);
// Mix in per-process seed.
vseed = vaeseq_u8(*pseed, vinit);
++pseed;
// Scramble seed.
vseed = vaesmcq_u8(vseed);
if (size <= 16) {
if (size == 0) {
// Return 64 bits of scrambled input seed.
return vreinterpretq_u64_u8(vseed)[0];
} else if (size < 16) {
vval = vreinterpretq_u8_u64(vdupq_n_u64(0));
if ((size & 8) != 0) {
vval = vreinterpretq_u8_u64(vld1q_lane_u64((uint64_t*)(p), vreinterpretq_u64_u8(vval), 0));
p = (void*)((uint64_t*)(p) + 1);
}
if ((size & 4) != 0) {
vval = vreinterpretq_u8_u32(vld1q_lane_u32((uint32_t*)(p), vreinterpretq_u32_u8(vval), 2));
p = (void*)((uint32_t*)(p) + 1);
}
if ((size & 2) != 0) {
vval = vreinterpretq_u8_u16(vld1q_lane_u16((uint16_t*)(p), vreinterpretq_u16_u8(vval), 6));
p = (void*)((uint16_t*)(p) + 1);
}
if ((size & 1) != 0) {
vval = vld1q_lane_u8((uint8*)(p), vval, 14);
}
} else {
vval = *(uint8x16_t*)(p);
}
vval = vaeseq_u8(vval, vseed);
vval = vaesmcq_u8(vval);
vval = vaeseq_u8(vval, vseed);
vval = vaesmcq_u8(vval);
vval = vaeseq_u8(vval, vseed);
return vreinterpretq_u64_u8(vval)[0];
} else if (size <= 32) {
// Make a second seed.
vseed2 = vaeseq_u8(*pseed, vinit);
vseed2 = vaesmcq_u8(vseed2);
vval = *(uint8x16_t*)(p);
vval2 = *(uint8x16_t*)((char*)(p) + (size - 16));
vval = vaeseq_u8(vval, vseed);
vval = vaesmcq_u8(vval);
vval2 = vaeseq_u8(vval2, vseed2);
vval2 = vaesmcq_u8(vval2);
vval = vaeseq_u8(vval, vseed);
vval = vaesmcq_u8(vval);
vval2 = vaeseq_u8(vval2, vseed2);
vval2 = vaesmcq_u8(vval2);
vval = vaeseq_u8(vval, vseed);
vval2 = vaeseq_u8(vval2, vseed2);
vval ^= vval2;
return vreinterpretq_u64_u8(vval)[0];
} else if (size <= 64) {
avseed3 = vld1q_u8_x3((uint8*)(pseed));
vseed2 = avseed3.val[0];
vseed3 = avseed3.val[1];
vseed4 = avseed3.val[2];
vseed2 = vaeseq_u8(vseed2, vinit);
vseed2 = vaesmcq_u8(vseed2);
vseed3 = vaeseq_u8(vseed3, vinit);
vseed3 = vaesmcq_u8(vseed3);
vseed4 = vaeseq_u8(vseed4, vinit);
vseed4 = vaesmcq_u8(vseed4);
avval2 = vld1q_u8_x2((uint8*)(p));
vval = avval2.val[0];
vval2 = avval2.val[1];
avval2 = vld1q_u8_x2((uint8*)(p) + (size - 32));
vval3 = avval2.val[0];
vval4 = avval2.val[1];
vval = vaeseq_u8(vval, vseed);
vval = vaesmcq_u8(vval);
vval2 = vaeseq_u8(vval2, vseed2);
vval2 = vaesmcq_u8(vval2);
vval3 = vaeseq_u8(vval3, vseed3);
vval3 = vaesmcq_u8(vval3);
vval4 = vaeseq_u8(vval4, vseed4);
vval4 = vaesmcq_u8(vval4);
vval = vaeseq_u8(vval, vseed);
vval = vaesmcq_u8(vval);
vval2 = vaeseq_u8(vval2, vseed2);
vval2 = vaesmcq_u8(vval2);
vval3 = vaeseq_u8(vval3, vseed3);
vval3 = vaesmcq_u8(vval3);
vval4 = vaeseq_u8(vval4, vseed4);
vval4 = vaesmcq_u8(vval4);
vval = vaeseq_u8(vval, vseed);
vval2 = vaeseq_u8(vval2, vseed2);
vval3 = vaeseq_u8(vval3, vseed3);
vval4 = vaeseq_u8(vval4, vseed4);
vval ^= vval3;
vval2 ^= vval4;
vval ^= vval2;
return vreinterpretq_u64_u8(vval)[0];
} else if (size <= 128) {
// For some reason vld1q_u8_x4 is missing.
avseed3 = vld1q_u8_x3((uint8*)(pseed));
vseed2 = avseed3.val[0];
vseed3 = avseed3.val[1];
vseed4 = avseed3.val[2];
avseed3 = vld1q_u8_x3((uint8*)(pseed + 3));
vseed5 = avseed3.val[0];
vseed6 = avseed3.val[1];
vseed7 = avseed3.val[2];
vseed8 = *(pseed + 6);
vseed2 = vaeseq_u8(vseed2, vinit);
vseed2 = vaesmcq_u8(vseed2);
vseed3 = vaeseq_u8(vseed3, vinit);
vseed3 = vaesmcq_u8(vseed3);
vseed4 = vaeseq_u8(vseed4, vinit);
vseed4 = vaesmcq_u8(vseed4);
vseed5 = vaeseq_u8(vseed5, vinit);
vseed5 = vaesmcq_u8(vseed5);
vseed6 = vaeseq_u8(vseed6, vinit);
vseed6 = vaesmcq_u8(vseed6);
vseed7 = vaeseq_u8(vseed7, vinit);
vseed7 = vaesmcq_u8(vseed7);
vseed8 = vaeseq_u8(vseed8, vinit);
vseed8 = vaesmcq_u8(vseed8);
avval2 = vld1q_u8_x2((uint8*)(p));
vval = avval2.val[0];
vval2 = avval2.val[1];
avval2 = vld1q_u8_x2((uint8*)(p) + 32);
vval3 = avval2.val[0];
vval4 = avval2.val[1];
avval2 = vld1q_u8_x2((uint8*)(p) + (size - 64));
vval5 = avval2.val[0];
vval6 = avval2.val[1];
avval2 = vld1q_u8_x2((uint8*)(p) + (size - 32));
vval7 = avval2.val[0];
vval8 = avval2.val[1];
vval = vaeseq_u8(vval, vseed);
vval = vaesmcq_u8(vval);
vval2 = vaeseq_u8(vval2, vseed2);
vval2 = vaesmcq_u8(vval2);
vval3 = vaeseq_u8(vval3, vseed3);
vval3 = vaesmcq_u8(vval3);
vval4 = vaeseq_u8(vval4, vseed4);
vval4 = vaesmcq_u8(vval4);
vval5 = vaeseq_u8(vval5, vseed5);
vval5 = vaesmcq_u8(vval5);
vval6 = vaeseq_u8(vval6, vseed6);
vval6 = vaesmcq_u8(vval6);
vval7 = vaeseq_u8(vval7, vseed7);
vval7 = vaesmcq_u8(vval7);
vval8 = vaeseq_u8(vval8, vseed8);
vval8 = vaesmcq_u8(vval8);
vval = vaeseq_u8(vval, vseed);
vval = vaesmcq_u8(vval);
vval2 = vaeseq_u8(vval2, vseed2);
vval2 = vaesmcq_u8(vval2);
vval3 = vaeseq_u8(vval3, vseed3);
vval3 = vaesmcq_u8(vval3);
vval4 = vaeseq_u8(vval4, vseed4);
vval4 = vaesmcq_u8(vval4);
vval5 = vaeseq_u8(vval5, vseed5);
vval5 = vaesmcq_u8(vval5);
vval6 = vaeseq_u8(vval6, vseed6);
vval6 = vaesmcq_u8(vval6);
vval7 = vaeseq_u8(vval7, vseed7);
vval7 = vaesmcq_u8(vval7);
vval8 = vaeseq_u8(vval8, vseed8);
vval8 = vaesmcq_u8(vval8);
vval = vaeseq_u8(vval, vseed);
vval2 = vaeseq_u8(vval2, vseed2);
vval3 = vaeseq_u8(vval3, vseed3);
vval4 = vaeseq_u8(vval4, vseed4);
vval5 = vaeseq_u8(vval5, vseed5);
vval6 = vaeseq_u8(vval6, vseed6);
vval7 = vaeseq_u8(vval7, vseed7);
vval8 = vaeseq_u8(vval8, vseed8);
vval ^= vval5;
vval2 ^= vval6;
vval3 ^= vval7;
vval4 ^= vval8;
vval ^= vval3;
vval2 ^= vval4;
vval ^= vval2;
return vreinterpretq_u64_u8(vval)[0];
} else {
// For some reason vld1q_u8_x4 is missing.
avseed3 = vld1q_u8_x3((uint8*)(pseed));
vseed2 = avseed3.val[0];
vseed3 = avseed3.val[1];
vseed4 = avseed3.val[2];
avseed3 = vld1q_u8_x3((uint8*)(pseed + 3));
vseed5 = avseed3.val[0];
vseed6 = avseed3.val[1];
vseed7 = avseed3.val[2];
vseed8 = *(pseed + 6);
vseed2 = vaeseq_u8(vseed2, vinit);
vseed2 = vaesmcq_u8(vseed2);
vseed3 = vaeseq_u8(vseed3, vinit);
vseed3 = vaesmcq_u8(vseed3);
vseed4 = vaeseq_u8(vseed4, vinit);
vseed4 = vaesmcq_u8(vseed4);
vseed5 = vaeseq_u8(vseed5, vinit);
vseed5 = vaesmcq_u8(vseed5);
vseed6 = vaeseq_u8(vseed6, vinit);
vseed6 = vaesmcq_u8(vseed6);
vseed7 = vaeseq_u8(vseed7, vinit);
vseed7 = vaesmcq_u8(vseed7);
vseed8 = vaeseq_u8(vseed8, vinit);
vseed8 = vaesmcq_u8(vseed8);
avval2 = vld1q_u8_x2((uint8*)(p) + (size - 128));
vval = avval2.val[0];
vval2 = avval2.val[1];
avval2 = vld1q_u8_x2((uint8*)(p) + (size - 96));
vval3 = avval2.val[0];
vval4 = avval2.val[1];
avval2 = vld1q_u8_x2((uint8*)(p) + (size - 64));
vval5 = avval2.val[0];
vval6 = avval2.val[1];
avval2 = vld1q_u8_x2((uint8*)(p) + (size - 32));
vval7 = avval2.val[0];
vval8 = avval2.val[1];
vvalLoop = vseed;
vvalLoop2 = vseed2;
vvalLoop3 = vseed3;
vvalLoop4 = vseed4;
vvalLoop5 = vseed5;
vvalLoop6 = vseed6;
vvalLoop7 = vseed7;
vvalLoop8 = vseed8;
size--;
size >>= 7;
do {
vval = vaeseq_u8(vval, vvalLoop);
vval = vaesmcq_u8(vval);
vval2 = vaeseq_u8(vval2, vvalLoop2);
vval2 = vaesmcq_u8(vval2);
vval3 = vaeseq_u8(vval3, vvalLoop3);
vval3 = vaesmcq_u8(vval3);
vval4 = vaeseq_u8(vval4, vvalLoop4);
vval4 = vaesmcq_u8(vval4);
vval5 = vaeseq_u8(vval5, vvalLoop5);
vval5 = vaesmcq_u8(vval5);
vval6 = vaeseq_u8(vval6, vvalLoop6);
vval6 = vaesmcq_u8(vval6);
vval7 = vaeseq_u8(vval7, vvalLoop7);
vval7 = vaesmcq_u8(vval7);
vval8 = vaeseq_u8(vval8, vvalLoop8);
vval8 = vaesmcq_u8(vval8);
avval2 = vld1q_u8_x2((uint8*)(p));
vvalLoop = avval2.val[0];
vvalLoop2 = avval2.val[1];
avval2 = vld1q_u8_x2((uint8*)(p) + 32);
vvalLoop3 = avval2.val[0];
vvalLoop4 = avval2.val[1];
avval2 = vld1q_u8_x2((uint8*)(p) + 64);
vvalLoop5 = avval2.val[0];
vvalLoop6 = avval2.val[1];
avval2 = vld1q_u8_x2((uint8*)(p) + 96);
vvalLoop7 = avval2.val[0];
vvalLoop8 = avval2.val[1];
p = (void *)((uint8*)(p) + 128);
vval = vaeseq_u8(vval, vvalLoop);
vval = vaesmcq_u8(vval);
vval2 = vaeseq_u8(vval2, vvalLoop2);
vval2 = vaesmcq_u8(vval2);
vval3 = vaeseq_u8(vval3, vvalLoop3);
vval3 = vaesmcq_u8(vval3);
vval4 = vaeseq_u8(vval4, vvalLoop4);
vval4 = vaesmcq_u8(vval4);
vval5 = vaeseq_u8(vval5, vvalLoop5);
vval5 = vaesmcq_u8(vval5);
vval6 = vaeseq_u8(vval6, vvalLoop6);
vval6 = vaesmcq_u8(vval6);
vval7 = vaeseq_u8(vval7, vvalLoop7);
vval7 = vaesmcq_u8(vval7);
vval8 = vaeseq_u8(vval8, vvalLoop8);
vval8 = vaesmcq_u8(vval8);
} while (--size > 0);
vval = vaeseq_u8(vval, vvalLoop);
vval = vaesmcq_u8(vval);
vval2 = vaeseq_u8(vval2, vvalLoop2);
vval2 = vaesmcq_u8(vval2);
vval3 = vaeseq_u8(vval3, vvalLoop3);
vval3 = vaesmcq_u8(vval3);
vval4 = vaeseq_u8(vval4, vvalLoop4);
vval4 = vaesmcq_u8(vval4);
vval5 = vaeseq_u8(vval5, vvalLoop5);
vval5 = vaesmcq_u8(vval5);
vval6 = vaeseq_u8(vval6, vvalLoop6);
vval6 = vaesmcq_u8(vval6);
vval7 = vaeseq_u8(vval7, vvalLoop7);
vval7 = vaesmcq_u8(vval7);
vval8 = vaeseq_u8(vval8, vvalLoop8);
vval8 = vaesmcq_u8(vval8);
vval = vaeseq_u8(vval, vvalLoop);
vval = vaesmcq_u8(vval);
vval2 = vaeseq_u8(vval2, vvalLoop2);
vval2 = vaesmcq_u8(vval2);
vval3 = vaeseq_u8(vval3, vvalLoop3);
vval3 = vaesmcq_u8(vval3);
vval4 = vaeseq_u8(vval4, vvalLoop4);
vval4 = vaesmcq_u8(vval4);
vval5 = vaeseq_u8(vval5, vvalLoop5);
vval5 = vaesmcq_u8(vval5);
vval6 = vaeseq_u8(vval6, vvalLoop6);
vval6 = vaesmcq_u8(vval6);
vval7 = vaeseq_u8(vval7, vvalLoop7);
vval7 = vaesmcq_u8(vval7);
vval8 = vaeseq_u8(vval8, vvalLoop8);
vval8 = vaesmcq_u8(vval8);
vval = vaeseq_u8(vval, vvalLoop);
vval2 = vaeseq_u8(vval2, vvalLoop2);
vval3 = vaeseq_u8(vval3, vvalLoop3);
vval4 = vaeseq_u8(vval4, vvalLoop4);
vval5 = vaeseq_u8(vval5, vvalLoop5);
vval6 = vaeseq_u8(vval6, vvalLoop6);
vval7 = vaeseq_u8(vval7, vvalLoop7);
vval8 = vaeseq_u8(vval8, vvalLoop8);
vval ^= vval5;
vval2 ^= vval6;
vval3 ^= vval7;
vval4 ^= vval8;
vval ^= vval3;
vval2 ^= vval4;
vval ^= vval2;
return vreinterpretq_u64_u8(vval)[0];
}
}