in common/checksum/sse2neon.h [3961:4011]
FORCE_INLINE __m128i _mm_cvtps_epi32(__m128 a)
{
#if defined(__ARM_FEATURE_FRINT)
return vreinterpretq_m128i_s32(vcvtq_s32_f32(vrnd32xq_f32(a)));
#elif (defined(__aarch64__) || defined(_M_ARM64)) || \
defined(__ARM_FEATURE_DIRECTED_ROUNDING)
switch (_MM_GET_ROUNDING_MODE()) {
case _MM_ROUND_NEAREST:
return vreinterpretq_m128i_s32(vcvtnq_s32_f32(a));
case _MM_ROUND_DOWN:
return vreinterpretq_m128i_s32(vcvtmq_s32_f32(a));
case _MM_ROUND_UP:
return vreinterpretq_m128i_s32(vcvtpq_s32_f32(a));
default: // _MM_ROUND_TOWARD_ZERO
return vreinterpretq_m128i_s32(vcvtq_s32_f32(a));
}
#else
float *f = (float *) &a;
switch (_MM_GET_ROUNDING_MODE()) {
case _MM_ROUND_NEAREST: {
uint32x4_t signmask = vdupq_n_u32(0x80000000);
float32x4_t half = vbslq_f32(signmask, vreinterpretq_f32_m128(a),
vdupq_n_f32(0.5f)); /* +/- 0.5 */
int32x4_t r_normal = vcvtq_s32_f32(vaddq_f32(
vreinterpretq_f32_m128(a), half)); /* round to integer: [a + 0.5]*/
int32x4_t r_trunc = vcvtq_s32_f32(
vreinterpretq_f32_m128(a)); /* truncate to integer: [a] */
int32x4_t plusone = vreinterpretq_s32_u32(vshrq_n_u32(
vreinterpretq_u32_s32(vnegq_s32(r_trunc)), 31)); /* 1 or 0 */
int32x4_t r_even = vbicq_s32(vaddq_s32(r_trunc, plusone),
vdupq_n_s32(1)); /* ([a] + {0,1}) & ~1 */
float32x4_t delta = vsubq_f32(
vreinterpretq_f32_m128(a),
vcvtq_f32_s32(r_trunc)); /* compute delta: delta = (a - [a]) */
uint32x4_t is_delta_half =
vceqq_f32(delta, half); /* delta == +/- 0.5 */
return vreinterpretq_m128i_s32(
vbslq_s32(is_delta_half, r_even, r_normal));
}
case _MM_ROUND_DOWN:
return _mm_set_epi32(floorf(f[3]), floorf(f[2]), floorf(f[1]),
floorf(f[0]));
case _MM_ROUND_UP:
return _mm_set_epi32(ceilf(f[3]), ceilf(f[2]), ceilf(f[1]),
ceilf(f[0]));
default: // _MM_ROUND_TOWARD_ZERO
return _mm_set_epi32((int32_t) f[3], (int32_t) f[2], (int32_t) f[1],
(int32_t) f[0]);
}
#endif
}