void ResizeTask::kernelU2()

in renderscript-toolkit/src/main/cpp/Resize.cpp [421:491]


void ResizeTask::kernelU2(uchar* outPtr, uint32_t xstart, uint32_t xend, uint32_t currentY) {
    const uchar *pin = mIn;
    const int srcHeight = mInputSizeY;
    const int srcWidth = mInputSizeX;
    const size_t stride = mInputSizeX * mVectorSize;


#if defined(ARCH_X86_HAVE_AVX2)
    float yf = _mm_cvtss_f32(
            _mm_fmsub_ss(_mm_set1_ps(currentY + 0.5f), _mm_set1_ps(scaleY), _mm_set1_ps(0.5f)));
#else
    float yf = (currentY + 0.5f) * mScaleY - 0.5f;
#endif

    int starty = (int) floor(yf - 1);
    yf = yf - floor(yf);
    int maxy = srcHeight - 1;
    int ys0 = std::max(0, starty + 0);
    int ys1 = std::max(0, starty + 1);
    int ys2 = std::min(maxy, starty + 2);
    int ys3 = std::min(maxy, starty + 3);

    const uchar2 *yp0 = (const uchar2 *)(pin + stride * ys0);
    const uchar2 *yp1 = (const uchar2 *)(pin + stride * ys1);
    const uchar2 *yp2 = (const uchar2 *)(pin + stride * ys2);
    const uchar2 *yp3 = (const uchar2 *)(pin + stride * ys3);

    uchar2 *out = ((uchar2 *)outPtr);
    uint32_t x1 = xstart;
    uint32_t x2 = xend;

#if defined(ARCH_ARM_USE_INTRINSICS)
    if (mUsesSimd && x2 > x1 && mScaleX < 4.0f) {
        float xf = (x1 + 0.5f) * mScaleX - 0.5f;
        long xf16 = rint(xf * 0x10000);
        uint32_t xinc16 = rint(mScaleX * 0x10000);

        int xoff = (xf16 >> 16) - 1;
        int xclip = std::max(0, xoff) - xoff;
        int len = x2 - x1;

        int32_t yr[4];
        uint64_t osc_ctl = rsdIntrinsicResize_oscctl_K(xinc16);
        mkYCoeff(yr, yf);

        xoff += xclip;

        rsdIntrinsicResizeB2_K(
                out, len,
                xf16 & 0xffff, xinc16,
                yp0 + xoff, yp1 + xoff, yp2 + xoff, yp3 + xoff,
                xclip, srcWidth - xoff + xclip,
                osc_ctl, yr);
        out += len;
        x1 += len;
    }
#endif

    while(x1 < x2) {

#if defined(ARCH_X86_HAVE_AVX2)
        float xf = _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(x1 + 0.5f) , _mm_set1_ps(scaleX) ,
                                              _mm_set1_ps(0.5f)));
#else
        float xf = (x1 + 0.5f) * mScaleX - 0.5f;
#endif
        *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
        out++;
        x1++;
    }
}