void KeccakP1600times4_ExtractLanesAll()

in src/sha3/keccak4x/KeccakP-1600-times4-SIMD256.c [291:339]


void KeccakP1600times4_ExtractLanesAll(const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
{
    UINT64 *curData0 = (UINT64 *)data;
    UINT64 *curData1 = (UINT64 *)(data+laneOffset*1*SnP_laneLengthInBytes);
    UINT64 *curData2 = (UINT64 *)(data+laneOffset*2*SnP_laneLengthInBytes);
    UINT64 *curData3 = (UINT64 *)(data+laneOffset*3*SnP_laneLengthInBytes);

    const V256 *stateAsLanes = (const V256 *)states;
    const UINT64 *stateAsLanes64 = (const UINT64*)states;
    V256    lanes0, lanes1, lanes2, lanes3, lanesL01, lanesL23, lanesH01, lanesH23;
    unsigned int i;

    #define Extr( argIndex )    curData0[argIndex] = stateAsLanes64[4*(argIndex)],      \
                                curData1[argIndex] = stateAsLanes64[4*(argIndex)+1],    \
                                curData2[argIndex] = stateAsLanes64[4*(argIndex)+2],    \
                                curData3[argIndex] = stateAsLanes64[4*(argIndex)+3]

    #define Extr4( argIndex )   lanes0 = LOAD256( stateAsLanes[argIndex+0] ),           \
                                lanes1 = LOAD256( stateAsLanes[argIndex+1] ),           \
                                lanes2 = LOAD256( stateAsLanes[argIndex+2] ),           \
                                lanes3 = LOAD256( stateAsLanes[argIndex+3] ),           \
                                UNINTLEAVE(),                                           \
                                STORE256u( curData0[argIndex], lanes0 ),                \
                                STORE256u( curData1[argIndex], lanes1 ),                \
                                STORE256u( curData2[argIndex], lanes2 ),                \
                                STORE256u( curData3[argIndex], lanes3 )

    if ( laneCount >= 16 )  {
        Extr4( 0 );
        Extr4( 4 );
        Extr4( 8 );
        Extr4( 12 );
        if ( laneCount >= 20 )  {
            Extr4( 16 );
            for(i=20; i<laneCount; i++)
                Extr( i );
        }
        else {
            for(i=16; i<laneCount; i++)
                Extr( i );
        }
    }
    else {
        for(i=0; i<laneCount; i++)
            Extr( i );
    }
    #undef  Extr
    #undef  Extr4
}