in src/sha3/keccak4x/KeccakP-1600-times4-SIMD256.c [291:339]
void KeccakP1600times4_ExtractLanesAll(const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
{
UINT64 *curData0 = (UINT64 *)data;
UINT64 *curData1 = (UINT64 *)(data+laneOffset*1*SnP_laneLengthInBytes);
UINT64 *curData2 = (UINT64 *)(data+laneOffset*2*SnP_laneLengthInBytes);
UINT64 *curData3 = (UINT64 *)(data+laneOffset*3*SnP_laneLengthInBytes);
const V256 *stateAsLanes = (const V256 *)states;
const UINT64 *stateAsLanes64 = (const UINT64*)states;
V256 lanes0, lanes1, lanes2, lanes3, lanesL01, lanesL23, lanesH01, lanesH23;
unsigned int i;
#define Extr( argIndex ) curData0[argIndex] = stateAsLanes64[4*(argIndex)], \
curData1[argIndex] = stateAsLanes64[4*(argIndex)+1], \
curData2[argIndex] = stateAsLanes64[4*(argIndex)+2], \
curData3[argIndex] = stateAsLanes64[4*(argIndex)+3]
#define Extr4( argIndex ) lanes0 = LOAD256( stateAsLanes[argIndex+0] ), \
lanes1 = LOAD256( stateAsLanes[argIndex+1] ), \
lanes2 = LOAD256( stateAsLanes[argIndex+2] ), \
lanes3 = LOAD256( stateAsLanes[argIndex+3] ), \
UNINTLEAVE(), \
STORE256u( curData0[argIndex], lanes0 ), \
STORE256u( curData1[argIndex], lanes1 ), \
STORE256u( curData2[argIndex], lanes2 ), \
STORE256u( curData3[argIndex], lanes3 )
if ( laneCount >= 16 ) {
Extr4( 0 );
Extr4( 4 );
Extr4( 8 );
Extr4( 12 );
if ( laneCount >= 20 ) {
Extr4( 16 );
for(i=20; i<laneCount; i++)
Extr( i );
}
else {
for(i=16; i<laneCount; i++)
Extr( i );
}
}
else {
for(i=0; i<laneCount; i++)
Extr( i );
}
#undef Extr
#undef Extr4
}