void SRMaskBytesSIMD()

in SocketRocket/Internal/Utilities/SRSIMDHelpers.m [41:73]


void SRMaskBytesSIMD(uint8_t *bytes, size_t length, uint8_t *maskKey) {
    size_t alignmentBytes = _Alignof(uint8x32_t) - ((uintptr_t)bytes % _Alignof(uint8x32_t));
    if (alignmentBytes == _Alignof(uint8x32_t)) {
        alignmentBytes = 0;
    }

    // If the number of bytes that can be processed after aligning is
    // less than the number of bytes we can put into a vector,
    // then there's no work to do with SIMD, just call the manual version.
    if (alignmentBytes > length || (length - alignmentBytes) < sizeof(uint8x32_t)) {
        SRMaskBytesManual(bytes, length, maskKey);
        return;
    }

    size_t vectorLength = (length - alignmentBytes) / sizeof(uint8x32_t);
    size_t manualStartOffset = alignmentBytes + (vectorLength * sizeof(uint8x32_t));
    size_t manualLength = length - manualStartOffset;

    uint8x32_t *vector = (uint8x32_t *)(bytes + alignmentBytes);
    uint8x32_t maskVector = { };

    memset_pattern4(&maskVector, maskKey, sizeof(uint8x32_t));
    maskVector = SRShiftVector(maskVector, alignmentBytes);

    SRMaskBytesManual(bytes, alignmentBytes, maskKey);

    for (size_t vectorIndex = 0; vectorIndex < vectorLength; vectorIndex++) {
        vector[vectorIndex] = vector[vectorIndex] ^ maskVector;
    }

    // Use the shifted mask for the final manual part.
    SRMaskBytesManual(bytes + manualStartOffset, manualLength, (uint8_t *) &maskVector);
}