in renderscript-toolkit/src/main/cpp/Resize.cpp [421:491]
void ResizeTask::kernelU2(uchar* outPtr, uint32_t xstart, uint32_t xend, uint32_t currentY) {
const uchar *pin = mIn;
const int srcHeight = mInputSizeY;
const int srcWidth = mInputSizeX;
const size_t stride = mInputSizeX * mVectorSize;
#if defined(ARCH_X86_HAVE_AVX2)
float yf = _mm_cvtss_f32(
_mm_fmsub_ss(_mm_set1_ps(currentY + 0.5f), _mm_set1_ps(scaleY), _mm_set1_ps(0.5f)));
#else
float yf = (currentY + 0.5f) * mScaleY - 0.5f;
#endif
int starty = (int) floor(yf - 1);
yf = yf - floor(yf);
int maxy = srcHeight - 1;
int ys0 = std::max(0, starty + 0);
int ys1 = std::max(0, starty + 1);
int ys2 = std::min(maxy, starty + 2);
int ys3 = std::min(maxy, starty + 3);
const uchar2 *yp0 = (const uchar2 *)(pin + stride * ys0);
const uchar2 *yp1 = (const uchar2 *)(pin + stride * ys1);
const uchar2 *yp2 = (const uchar2 *)(pin + stride * ys2);
const uchar2 *yp3 = (const uchar2 *)(pin + stride * ys3);
uchar2 *out = ((uchar2 *)outPtr);
uint32_t x1 = xstart;
uint32_t x2 = xend;
#if defined(ARCH_ARM_USE_INTRINSICS)
if (mUsesSimd && x2 > x1 && mScaleX < 4.0f) {
float xf = (x1 + 0.5f) * mScaleX - 0.5f;
long xf16 = rint(xf * 0x10000);
uint32_t xinc16 = rint(mScaleX * 0x10000);
int xoff = (xf16 >> 16) - 1;
int xclip = std::max(0, xoff) - xoff;
int len = x2 - x1;
int32_t yr[4];
uint64_t osc_ctl = rsdIntrinsicResize_oscctl_K(xinc16);
mkYCoeff(yr, yf);
xoff += xclip;
rsdIntrinsicResizeB2_K(
out, len,
xf16 & 0xffff, xinc16,
yp0 + xoff, yp1 + xoff, yp2 + xoff, yp3 + xoff,
xclip, srcWidth - xoff + xclip,
osc_ctl, yr);
out += len;
x1 += len;
}
#endif
while(x1 < x2) {
#if defined(ARCH_X86_HAVE_AVX2)
float xf = _mm_cvtss_f32(_mm_fmsub_ss(_mm_set1_ps(x1 + 0.5f) , _mm_set1_ps(scaleX) ,
_mm_set1_ps(0.5f)));
#else
float xf = (x1 + 0.5f) * mScaleX - 0.5f;
#endif
*out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
out++;
x1++;
}
}