void BlendTask::blend()

in renderscript-toolkit/src/main/cpp/Blend.cpp [83:345]


void BlendTask::blend(RenderScriptToolkit::BlendingMode mode, const uchar4* in, uchar4* out,
                      uint32_t length) {
    uint32_t x1 = 0;
    uint32_t x2 = length;

#if defined(ARCH_ARM_USE_INTRINSICS)
    if (mUsesSimd) {
        if (rsdIntrinsicBlend_K(out, in, (int) mode, x1, x2) >= 0) {
            return;
        } else {
            ALOGW("Intrinsic Blend failed to use SIMD for %d", mode);
        }
    }
#endif
    switch (mode) {
    case RenderScriptToolkit::BlendingMode::CLEAR:
        for (;x1 < x2; x1++, out++) {
            *out = 0;
        }
        break;
    case RenderScriptToolkit::BlendingMode::SRC:
        for (;x1 < x2; x1++, out++, in++) {
          *out = *in;
        }
        break;
    //RenderScriptToolkit::BlendingMode::DST is a NOP
    case RenderScriptToolkit::BlendingMode::DST:
        break;
    case RenderScriptToolkit::BlendingMode::SRC_OVER:
    #if defined(ARCH_X86_HAVE_SSSE3)
        if (mUsesSimd) {
            if ((x1 + 8) < x2) {
                uint32_t len = (x2 - x1) >> 3;
                rsdIntrinsicBlendSrcOver_K(out, in, len);
                x1 += len << 3;
                out += len << 3;
                in += len << 3;
            }
        }
    #endif
        for (;x1 < x2; x1++, out++, in++) {
            ushort4 in_s = convert<ushort4>(*in);
            ushort4 out_s = convert<ushort4>(*out);
            in_s = in_s + ((out_s * (ushort4)(255 - in_s.w)) >> (ushort4)8);
            *out = convertClipped(in_s);
        }
        break;
    case RenderScriptToolkit::BlendingMode::DST_OVER:
    #if defined(ARCH_X86_HAVE_SSSE3)
        if (mUsesSimd) {
            if ((x1 + 8) < x2) {
                uint32_t len = (x2 - x1) >> 3;
                rsdIntrinsicBlendDstOver_K(out, in, len);
                x1 += len << 3;
                out += len << 3;
                in += len << 3;
            }
        }
     #endif
        for (;x1 < x2; x1++, out++, in++) {
            ushort4 in_s = convert<ushort4>(*in);
            ushort4 out_s = convert<ushort4>(*out);
            in_s = out_s + ((in_s * (ushort4)(255 - out_s.w)) >> (ushort4)8);
            *out = convertClipped(in_s);
        }
        break;
    case RenderScriptToolkit::BlendingMode::SRC_IN:
    #if defined(ARCH_X86_HAVE_SSSE3)
        if (mUsesSimd) {
            if ((x1 + 8) < x2) {
                uint32_t len = (x2 - x1) >> 3;
                rsdIntrinsicBlendSrcIn_K(out, in, len);
                x1 += len << 3;
                out += len << 3;
                in += len << 3;
            }
        }
#endif
        for (;x1 < x2; x1++, out++, in++) {
            ushort4 in_s = convert<ushort4>(*in);
            in_s = (in_s * out->w) >> (ushort4)8;
            *out = convert<uchar4>(in_s);
        }
        break;
    case RenderScriptToolkit::BlendingMode::DST_IN:
    #if defined(ARCH_X86_HAVE_SSSE3)
        if (mUsesSimd) {
            if ((x1 + 8) < x2) {
                uint32_t len = (x2 - x1) >> 3;
                rsdIntrinsicBlendDstIn_K(out, in, len);
                x1 += len << 3;
                out += len << 3;
                in += len << 3;
            }
        }
     #endif
        for (;x1 < x2; x1++, out++, in++) {
            ushort4 out_s = convert<ushort4>(*out);
            out_s = (out_s * in->w) >> (ushort4)8;
            *out = convert<uchar4>(out_s);
        }
        break;
    case RenderScriptToolkit::BlendingMode::SRC_OUT:
    #if defined(ARCH_X86_HAVE_SSSE3)
        if (mUsesSimd) {
            if ((x1 + 8) < x2) {
                uint32_t len = (x2 - x1) >> 3;
                rsdIntrinsicBlendSrcOut_K(out, in, len);
                x1 += len << 3;
                out += len << 3;
                in += len << 3;
            }
        }
    #endif
        for (;x1 < x2; x1++, out++, in++) {
            ushort4 in_s = convert<ushort4>(*in);
            in_s = (in_s * (ushort4)(255 - out->w)) >> (ushort4)8;
            *out = convert<uchar4>(in_s);
        }
        break;
    case RenderScriptToolkit::BlendingMode::DST_OUT:
    #if defined(ARCH_X86_HAVE_SSSE3)
        if (mUsesSimd) {
            if ((x1 + 8) < x2) {
                uint32_t len = (x2 - x1) >> 3;
                rsdIntrinsicBlendDstOut_K(out, in, len);
                x1 += len << 3;
                out += len << 3;
                in += len << 3;
            }
        }
    #endif
        for (;x1 < x2; x1++, out++, in++) {
            ushort4 out_s = convert<ushort4>(*out);
            out_s = (out_s * (ushort4)(255 - in->w)) >> (ushort4)8;
            *out = convert<uchar4>(out_s);
        }
        break;
    case RenderScriptToolkit::BlendingMode::SRC_ATOP:
    #if defined(ARCH_X86_HAVE_SSSE3)
        if (mUsesSimd) {
            if ((x1 + 8) < x2) {
                uint32_t len = (x2 - x1) >> 3;
                rsdIntrinsicBlendSrcAtop_K(out, in, len);
                x1 += len << 3;
                out += len << 3;
                in += len << 3;
            }
        }
    #endif
        for (;x1 < x2; x1++, out++, in++) {
            // The max value the operation could produce before the shift
            // is 255 * 255 + 255 * (255 - 0) = 130050, or 0x1FC02.
            // That value does not fit in a ushort, so we use uint.
            uint4 in_s = convert<uint4>(*in);
            uint4 out_s = convert<uint4>(*out);
            out_s.xyz = ((in_s.xyz * out_s.w) +
              (out_s.xyz * ((uint3)255 - (uint3)in_s.w))) >> (uint3)8;
            *out = convertClipped(out_s);
        }
        break;
    case RenderScriptToolkit::BlendingMode::DST_ATOP:
    #if defined(ARCH_X86_HAVE_SSSE3)
        if (mUsesSimd) {
            if ((x1 + 8) < x2) {
                uint32_t len = (x2 - x1) >> 3;
                rsdIntrinsicBlendDstAtop_K(out, in, len);
                x1 += len << 3;
                out += len << 3;
                in += len << 3;
            }
        }
     #endif
        for (;x1 < x2; x1++, out++, in++) {
            uint4 in_s = convert<uint4>(*in);
            uint4 out_s = convert<uint4>(*out);
            out_s.xyz = ((out_s.xyz * in_s.w) +
              (in_s.xyz * ((uint3)255 - (uint3)out_s.w))) >> (uint3)8;
            out_s.w = in_s.w;
            *out = convertClipped(out_s);
        }
        break;
    case RenderScriptToolkit::BlendingMode::XOR:
    #if defined(ARCH_X86_HAVE_SSSE3)
        if (mUsesSimd) {
            if ((x1 + 8) < x2) {
                uint32_t len = (x2 - x1) >> 3;
                rsdIntrinsicBlendXor_K(out, in, len);
                x1 += len << 3;
                out += len << 3;
                in += len << 3;
            }
        }
    #endif
        for (;x1 < x2; x1++, out++, in++) {
            *out = *in ^ *out;
        }
        break;
    case RenderScriptToolkit::BlendingMode::MULTIPLY:
    #if defined(ARCH_X86_HAVE_SSSE3)
        if (mUsesSimd) {
            if ((x1 + 8) < x2) {
                uint32_t len = (x2 - x1) >> 3;
                rsdIntrinsicBlendMultiply_K(out, in, len);
                x1 += len << 3;
                out += len << 3;
                in += len << 3;
            }
        }
    #endif
        for (;x1 < x2; x1++, out++, in++) {
          *out = convert<uchar4>((convert<ushort4>(*in) * convert<ushort4>(*out))
                                >> (ushort4)8);
        }
        break;
    case RenderScriptToolkit::BlendingMode::ADD:
    #if defined(ARCH_X86_HAVE_SSSE3)
        if (mUsesSimd) {
            if((x1 + 8) < x2) {
                uint32_t len = (x2 - x1) >> 3;
                rsdIntrinsicBlendAdd_K(out, in, len);
                x1 += len << 3;
                out += len << 3;
                in += len << 3;
            }
        }
    #endif
        for (;x1 < x2; x1++, out++, in++) {
            uint32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w,
                oR = out->x, oG = out->y, oB = out->z, oA = out->w;
            out->x = (oR + iR) > 255 ? 255 : oR + iR;
            out->y = (oG + iG) > 255 ? 255 : oG + iG;
            out->z = (oB + iB) > 255 ? 255 : oB + iB;
            out->w = (oA + iA) > 255 ? 255 : oA + iA;
        }
        break;
    case RenderScriptToolkit::BlendingMode::SUBTRACT:
    #if defined(ARCH_X86_HAVE_SSSE3)
        if (mUsesSimd) {
            if((x1 + 8) < x2) {
                uint32_t len = (x2 - x1) >> 3;
                rsdIntrinsicBlendSub_K(out, in, len);
                x1 += len << 3;
                out += len << 3;
                in += len << 3;
            }
        }
    #endif
        for (;x1 < x2; x1++, out++, in++) {
            int32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w,
                oR = out->x, oG = out->y, oB = out->z, oA = out->w;
            out->x = (oR - iR) < 0 ? 0 : oR - iR;
            out->y = (oG - iG) < 0 ? 0 : oG - iG;
            out->z = (oB - iB) < 0 ? 0 : oB - iB;
            out->w = (oA - iA) < 0 ? 0 : oA - iA;
        }
        break;

    default:
        ALOGE("Called unimplemented value %d", mode);
        assert(false);
    }
}