inline XMVECTOR XM_CALLCONV XMVectorExp2()

in Inc/DirectXMathVector.inl [3232:3365]
93 lines of code
1 McCabe index (conditional complexity)

inline XMVECTOR XM_CALLCONV XMVectorExp2(FXMVECTOR V) noexcept
{
#if defined(_XM_NO_INTRINSICS_)
    XMVECTORF32 Result = { { {
            exp2f(V.vector4_f32[0]),
            exp2f(V.vector4_f32[1]),
            exp2f(V.vector4_f32[2]),
            exp2f(V.vector4_f32[3])
        } } };
    return Result.v;
#elif defined(_XM_ARM_NEON_INTRINSICS_)
    int32x4_t itrunc = vcvtq_s32_f32(V);
    float32x4_t ftrunc = vcvtq_f32_s32(itrunc);
    float32x4_t y = vsubq_f32(V, ftrunc);

    float32x4_t poly = vmlaq_f32(g_XMExpEst6, g_XMExpEst7, y);
    poly = vmlaq_f32(g_XMExpEst5, poly, y);
    poly = vmlaq_f32(g_XMExpEst4, poly, y);
    poly = vmlaq_f32(g_XMExpEst3, poly, y);
    poly = vmlaq_f32(g_XMExpEst2, poly, y);
    poly = vmlaq_f32(g_XMExpEst1, poly, y);
    poly = vmlaq_f32(g_XMOne, poly, y);

    int32x4_t biased = vaddq_s32(itrunc, g_XMExponentBias);
    biased = vshlq_n_s32(biased, 23);
    float32x4_t result0 = XMVectorDivide(vreinterpretq_f32_s32(biased), poly);

    biased = vaddq_s32(itrunc, g_XM253);
    biased = vshlq_n_s32(biased, 23);
    float32x4_t result1 = XMVectorDivide(vreinterpretq_f32_s32(biased), poly);
    result1 = vmulq_f32(g_XMMinNormal.v, result1);

    // Use selection to handle the cases
    //  if (V is NaN) -> QNaN;
    //  else if (V sign bit set)
    //      if (V > -150)
    //         if (V.exponent < -126) -> result1
    //         else -> result0
    //      else -> +0
    //  else
    //      if (V < 128) -> result0
    //      else -> +inf

    uint32x4_t comp = vcltq_s32(vreinterpretq_s32_f32(V), g_XMBin128);
    float32x4_t result2 = vbslq_f32(comp, result0, g_XMInfinity);

    comp = vcltq_s32(itrunc, g_XMSubnormalExponent);
    float32x4_t result3 = vbslq_f32(comp, result1, result0);

    comp = vcltq_s32(vreinterpretq_s32_f32(V), g_XMBinNeg150);
    float32x4_t result4 = vbslq_f32(comp, result3, g_XMZero);

    int32x4_t sign = vandq_s32(vreinterpretq_s32_f32(V), g_XMNegativeZero);
    comp = vceqq_s32(sign, g_XMNegativeZero);
    float32x4_t result5 = vbslq_f32(comp, result4, result2);

    int32x4_t t0 = vandq_s32(vreinterpretq_s32_f32(V), g_XMQNaNTest);
    int32x4_t t1 = vandq_s32(vreinterpretq_s32_f32(V), g_XMInfinity);
    t0 = vreinterpretq_s32_u32(vceqq_s32(t0, g_XMZero));
    t1 = vreinterpretq_s32_u32(vceqq_s32(t1, g_XMInfinity));
    int32x4_t isNaN = vbicq_s32(t1, t0);

    float32x4_t vResult = vbslq_f32(vreinterpretq_u32_s32(isNaN), g_XMQNaN, result5);
    return vResult;
#elif defined(_XM_SVML_INTRINSICS_)
    XMVECTOR Result = _mm_exp2_ps(V);
    return Result;
#elif defined(_XM_SSE_INTRINSICS_)
    __m128i itrunc = _mm_cvttps_epi32(V);
    __m128 ftrunc = _mm_cvtepi32_ps(itrunc);
    __m128 y = _mm_sub_ps(V, ftrunc);

    __m128 poly = XM_FMADD_PS(g_XMExpEst7, y, g_XMExpEst6);
    poly = XM_FMADD_PS(poly, y, g_XMExpEst5);
    poly = XM_FMADD_PS(poly, y, g_XMExpEst4);
    poly = XM_FMADD_PS(poly, y, g_XMExpEst3);
    poly = XM_FMADD_PS(poly, y, g_XMExpEst2);
    poly = XM_FMADD_PS(poly, y, g_XMExpEst1);
    poly = XM_FMADD_PS(poly, y, g_XMOne);

    __m128i biased = _mm_add_epi32(itrunc, g_XMExponentBias);
    biased = _mm_slli_epi32(biased, 23);
    __m128 result0 = _mm_div_ps(_mm_castsi128_ps(biased), poly);

    biased = _mm_add_epi32(itrunc, g_XM253);
    biased = _mm_slli_epi32(biased, 23);
    __m128 result1 = _mm_div_ps(_mm_castsi128_ps(biased), poly);
    result1 = _mm_mul_ps(g_XMMinNormal.v, result1);

    // Use selection to handle the cases
    //  if (V is NaN) -> QNaN;
    //  else if (V sign bit set)
    //      if (V > -150)
    //         if (V.exponent < -126) -> result1
    //         else -> result0
    //      else -> +0
    //  else
    //      if (V < 128) -> result0
    //      else -> +inf

    __m128i comp = _mm_cmplt_epi32(_mm_castps_si128(V), g_XMBin128);
    __m128i select0 = _mm_and_si128(comp, _mm_castps_si128(result0));
    __m128i select1 = _mm_andnot_si128(comp, g_XMInfinity);
    __m128i result2 = _mm_or_si128(select0, select1);

    comp = _mm_cmplt_epi32(itrunc, g_XMSubnormalExponent);
    select1 = _mm_and_si128(comp, _mm_castps_si128(result1));
    select0 = _mm_andnot_si128(comp, _mm_castps_si128(result0));
    __m128i result3 = _mm_or_si128(select0, select1);

    comp = _mm_cmplt_epi32(_mm_castps_si128(V), g_XMBinNeg150);
    select0 = _mm_and_si128(comp, result3);
    select1 = _mm_andnot_si128(comp, g_XMZero);
    __m128i result4 = _mm_or_si128(select0, select1);

    __m128i sign = _mm_and_si128(_mm_castps_si128(V), g_XMNegativeZero);
    comp = _mm_cmpeq_epi32(sign, g_XMNegativeZero);
    select0 = _mm_and_si128(comp, result4);
    select1 = _mm_andnot_si128(comp, result2);
    __m128i result5 = _mm_or_si128(select0, select1);

    __m128i t0 = _mm_and_si128(_mm_castps_si128(V), g_XMQNaNTest);
    __m128i t1 = _mm_and_si128(_mm_castps_si128(V), g_XMInfinity);
    t0 = _mm_cmpeq_epi32(t0, g_XMZero);
    t1 = _mm_cmpeq_epi32(t1, g_XMInfinity);
    __m128i isNaN = _mm_andnot_si128(t0, t1);

    select0 = _mm_and_si128(isNaN, g_XMQNaN);
    select1 = _mm_andnot_si128(isNaN, result5);
    __m128i vResult = _mm_or_si128(select0, select1);

    return _mm_castsi128_ps(vResult);
#endif
}