void KeccakP1600times4_AddLanesAll()

in src/sha3/keccak4x/KeccakP-1600-times4-SIMD256.c [123:166]


void KeccakP1600times4_AddLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
{
    V256 *stateAsLanes = (V256 *)states;
    unsigned int i;
    const UINT64 *curData0 = (const UINT64 *)data;
    const UINT64 *curData1 = (const UINT64 *)(data+laneOffset*SnP_laneLengthInBytes);
    const UINT64 *curData2 = (const UINT64 *)(data+laneOffset*2*SnP_laneLengthInBytes);
    const UINT64 *curData3 = (const UINT64 *)(data+laneOffset*3*SnP_laneLengthInBytes);
    V256    lanes0, lanes1, lanes2, lanes3, lanesL01, lanesL23, lanesH01, lanesH23;

    #define Xor_In( argIndex )  XOReq256(stateAsLanes[argIndex], LOAD4_64(curData3[argIndex], curData2[argIndex], curData1[argIndex], curData0[argIndex]))

    #define Xor_In4( argIndex ) lanes0 = LOAD256u( curData0[argIndex]),\
                                lanes1 = LOAD256u( curData1[argIndex]),\
                                lanes2 = LOAD256u( curData2[argIndex]),\
                                lanes3 = LOAD256u( curData3[argIndex]),\
                                INTLEAVE(),\
                                XOReq256( stateAsLanes[argIndex+0], lanes0 ),\
                                XOReq256( stateAsLanes[argIndex+1], lanes1 ),\
                                XOReq256( stateAsLanes[argIndex+2], lanes2 ),\
                                XOReq256( stateAsLanes[argIndex+3], lanes3 )

    if ( laneCount >= 16 )  {
        Xor_In4( 0 );
        Xor_In4( 4 );
        Xor_In4( 8 );
        Xor_In4( 12 );
        if ( laneCount >= 20 )  {
            Xor_In4( 16 );
            for(i=20; i<laneCount; i++)
                Xor_In( i );
        }
        else {
            for(i=16; i<laneCount; i++)
                Xor_In( i );
        }
    }
    else {
        for(i=0; i<laneCount; i++)
            Xor_In( i );
    }
    #undef  Xor_In
    #undef  Xor_In4
}