in Inc/DirectXMathVector.inl [3232:3365]
inline XMVECTOR XM_CALLCONV XMVectorExp2(FXMVECTOR V) noexcept
{
#if defined(_XM_NO_INTRINSICS_)
XMVECTORF32 Result = { { {
exp2f(V.vector4_f32[0]),
exp2f(V.vector4_f32[1]),
exp2f(V.vector4_f32[2]),
exp2f(V.vector4_f32[3])
} } };
return Result.v;
#elif defined(_XM_ARM_NEON_INTRINSICS_)
int32x4_t itrunc = vcvtq_s32_f32(V);
float32x4_t ftrunc = vcvtq_f32_s32(itrunc);
float32x4_t y = vsubq_f32(V, ftrunc);
float32x4_t poly = vmlaq_f32(g_XMExpEst6, g_XMExpEst7, y);
poly = vmlaq_f32(g_XMExpEst5, poly, y);
poly = vmlaq_f32(g_XMExpEst4, poly, y);
poly = vmlaq_f32(g_XMExpEst3, poly, y);
poly = vmlaq_f32(g_XMExpEst2, poly, y);
poly = vmlaq_f32(g_XMExpEst1, poly, y);
poly = vmlaq_f32(g_XMOne, poly, y);
int32x4_t biased = vaddq_s32(itrunc, g_XMExponentBias);
biased = vshlq_n_s32(biased, 23);
float32x4_t result0 = XMVectorDivide(vreinterpretq_f32_s32(biased), poly);
biased = vaddq_s32(itrunc, g_XM253);
biased = vshlq_n_s32(biased, 23);
float32x4_t result1 = XMVectorDivide(vreinterpretq_f32_s32(biased), poly);
result1 = vmulq_f32(g_XMMinNormal.v, result1);
// Use selection to handle the cases
// if (V is NaN) -> QNaN;
// else if (V sign bit set)
// if (V > -150)
// if (V.exponent < -126) -> result1
// else -> result0
// else -> +0
// else
// if (V < 128) -> result0
// else -> +inf
uint32x4_t comp = vcltq_s32(vreinterpretq_s32_f32(V), g_XMBin128);
float32x4_t result2 = vbslq_f32(comp, result0, g_XMInfinity);
comp = vcltq_s32(itrunc, g_XMSubnormalExponent);
float32x4_t result3 = vbslq_f32(comp, result1, result0);
comp = vcltq_s32(vreinterpretq_s32_f32(V), g_XMBinNeg150);
float32x4_t result4 = vbslq_f32(comp, result3, g_XMZero);
int32x4_t sign = vandq_s32(vreinterpretq_s32_f32(V), g_XMNegativeZero);
comp = vceqq_s32(sign, g_XMNegativeZero);
float32x4_t result5 = vbslq_f32(comp, result4, result2);
int32x4_t t0 = vandq_s32(vreinterpretq_s32_f32(V), g_XMQNaNTest);
int32x4_t t1 = vandq_s32(vreinterpretq_s32_f32(V), g_XMInfinity);
t0 = vreinterpretq_s32_u32(vceqq_s32(t0, g_XMZero));
t1 = vreinterpretq_s32_u32(vceqq_s32(t1, g_XMInfinity));
int32x4_t isNaN = vbicq_s32(t1, t0);
float32x4_t vResult = vbslq_f32(vreinterpretq_u32_s32(isNaN), g_XMQNaN, result5);
return vResult;
#elif defined(_XM_SVML_INTRINSICS_)
XMVECTOR Result = _mm_exp2_ps(V);
return Result;
#elif defined(_XM_SSE_INTRINSICS_)
__m128i itrunc = _mm_cvttps_epi32(V);
__m128 ftrunc = _mm_cvtepi32_ps(itrunc);
__m128 y = _mm_sub_ps(V, ftrunc);
__m128 poly = XM_FMADD_PS(g_XMExpEst7, y, g_XMExpEst6);
poly = XM_FMADD_PS(poly, y, g_XMExpEst5);
poly = XM_FMADD_PS(poly, y, g_XMExpEst4);
poly = XM_FMADD_PS(poly, y, g_XMExpEst3);
poly = XM_FMADD_PS(poly, y, g_XMExpEst2);
poly = XM_FMADD_PS(poly, y, g_XMExpEst1);
poly = XM_FMADD_PS(poly, y, g_XMOne);
__m128i biased = _mm_add_epi32(itrunc, g_XMExponentBias);
biased = _mm_slli_epi32(biased, 23);
__m128 result0 = _mm_div_ps(_mm_castsi128_ps(biased), poly);
biased = _mm_add_epi32(itrunc, g_XM253);
biased = _mm_slli_epi32(biased, 23);
__m128 result1 = _mm_div_ps(_mm_castsi128_ps(biased), poly);
result1 = _mm_mul_ps(g_XMMinNormal.v, result1);
// Use selection to handle the cases
// if (V is NaN) -> QNaN;
// else if (V sign bit set)
// if (V > -150)
// if (V.exponent < -126) -> result1
// else -> result0
// else -> +0
// else
// if (V < 128) -> result0
// else -> +inf
__m128i comp = _mm_cmplt_epi32(_mm_castps_si128(V), g_XMBin128);
__m128i select0 = _mm_and_si128(comp, _mm_castps_si128(result0));
__m128i select1 = _mm_andnot_si128(comp, g_XMInfinity);
__m128i result2 = _mm_or_si128(select0, select1);
comp = _mm_cmplt_epi32(itrunc, g_XMSubnormalExponent);
select1 = _mm_and_si128(comp, _mm_castps_si128(result1));
select0 = _mm_andnot_si128(comp, _mm_castps_si128(result0));
__m128i result3 = _mm_or_si128(select0, select1);
comp = _mm_cmplt_epi32(_mm_castps_si128(V), g_XMBinNeg150);
select0 = _mm_and_si128(comp, result3);
select1 = _mm_andnot_si128(comp, g_XMZero);
__m128i result4 = _mm_or_si128(select0, select1);
__m128i sign = _mm_and_si128(_mm_castps_si128(V), g_XMNegativeZero);
comp = _mm_cmpeq_epi32(sign, g_XMNegativeZero);
select0 = _mm_and_si128(comp, result4);
select1 = _mm_andnot_si128(comp, result2);
__m128i result5 = _mm_or_si128(select0, select1);
__m128i t0 = _mm_and_si128(_mm_castps_si128(V), g_XMQNaNTest);
__m128i t1 = _mm_and_si128(_mm_castps_si128(V), g_XMInfinity);
t0 = _mm_cmpeq_epi32(t0, g_XMZero);
t1 = _mm_cmpeq_epi32(t1, g_XMInfinity);
__m128i isNaN = _mm_andnot_si128(t0, t1);
select0 = _mm_and_si128(isNaN, g_XMQNaN);
select1 = _mm_andnot_si128(isNaN, result5);
__m128i vResult = _mm_or_si128(select0, select1);
return _mm_castsi128_ps(vResult);
#endif
}