void ColorMatrixTask::kernel()

in renderscript-toolkit/src/main/cpp/ColorMatrix.cpp [906:957]


void ColorMatrixTask::kernel(uchar *out, uchar *in, uint32_t xstart, uint32_t xend) {
    uint32_t x1 = xstart;
    uint32_t x2 = xend;

    uint32_t vsin = mLastKey.u.inVecSize;
    uint32_t vsout = mLastKey.u.outVecSize;
    bool floatIn = !!mLastKey.u.inType;
    bool floatOut = !!mLastKey.u.outType;

    //if (!info->current.y) ALOGE("steps %i %i   %i %i", instep, outstep, vsin, vsout);

    if(x2 > x1) {
        int32_t len = x2 - x1;
        if (mUsesSimd) {
            if((mOptKernel != nullptr) && (len >= 4)) {
                // The optimized kernel processes 4 pixels at once
                // and requires a minimum of 1 chunk of 4
                mOptKernel(out, in, mIp, len >> 2);
                // Update the len and pointers so the generic code can
                // finish any leftover pixels
                len &= ~3;
                x1 += len;
                out += mOutstep * len;
                in += mInstep * len;
            }
#if defined(ARCH_ARM64_USE_INTRINSICS)
            else {
                if (mLastKey.u.inType == RS_TYPE_FLOAT_32 ||
                    mLastKey.u.outType == RS_TYPE_FLOAT_32) {
                    // Currently this generates off by one errors.
                    // rsdIntrinsicColorMatrix_float_K(out, in, len, &mFnTab, tmpFp, tmpFpa);
                    // x1 += len;
                    // out += outstep * len;
                    // in += instep * len;
                } else {
                    rsdIntrinsicColorMatrix_int_K(out, in, len, &mFnTab, mIp, mIpa);
                    x1 += len;
                    out += mOutstep * len;
                    in += mInstep * len;
                }
            }
#endif
        }

        while(x1 != x2) {
            One(out, in, mTmpFp, mTmpFpa, vsin, vsout, floatIn, floatOut);
            out += mOutstep;
            in += mInstep;
            x1++;
        }
    }
}