in common/checksum/sse2neon.h [7619:7673]
FORCE_INLINE __m128 _mm_round_ps(__m128 a, int rounding)
{
#if (defined(__aarch64__) || defined(_M_ARM64)) || \
defined(__ARM_FEATURE_DIRECTED_ROUNDING)
switch (rounding) {
case (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC):
return vreinterpretq_m128_f32(vrndnq_f32(vreinterpretq_f32_m128(a)));
case (_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC):
return _mm_floor_ps(a);
case (_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC):
return _mm_ceil_ps(a);
case (_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC):
return vreinterpretq_m128_f32(vrndq_f32(vreinterpretq_f32_m128(a)));
default: //_MM_FROUND_CUR_DIRECTION
return vreinterpretq_m128_f32(vrndiq_f32(vreinterpretq_f32_m128(a)));
}
#else
float *v_float = (float *) &a;
if (rounding == (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC) ||
(rounding == _MM_FROUND_CUR_DIRECTION &&
_MM_GET_ROUNDING_MODE() == _MM_ROUND_NEAREST)) {
uint32x4_t signmask = vdupq_n_u32(0x80000000);
float32x4_t half = vbslq_f32(signmask, vreinterpretq_f32_m128(a),
vdupq_n_f32(0.5f)); /* +/- 0.5 */
int32x4_t r_normal = vcvtq_s32_f32(vaddq_f32(
vreinterpretq_f32_m128(a), half)); /* round to integer: [a + 0.5]*/
int32x4_t r_trunc = vcvtq_s32_f32(
vreinterpretq_f32_m128(a)); /* truncate to integer: [a] */
int32x4_t plusone = vreinterpretq_s32_u32(vshrq_n_u32(
vreinterpretq_u32_s32(vnegq_s32(r_trunc)), 31)); /* 1 or 0 */
int32x4_t r_even = vbicq_s32(vaddq_s32(r_trunc, plusone),
vdupq_n_s32(1)); /* ([a] + {0,1}) & ~1 */
float32x4_t delta = vsubq_f32(
vreinterpretq_f32_m128(a),
vcvtq_f32_s32(r_trunc)); /* compute delta: delta = (a - [a]) */
uint32x4_t is_delta_half =
vceqq_f32(delta, half); /* delta == +/- 0.5 */
return vreinterpretq_m128_f32(
vcvtq_f32_s32(vbslq_s32(is_delta_half, r_even, r_normal)));
} else if (rounding == (_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC) ||
(rounding == _MM_FROUND_CUR_DIRECTION &&
_MM_GET_ROUNDING_MODE() == _MM_ROUND_DOWN)) {
return _mm_floor_ps(a);
} else if (rounding == (_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC) ||
(rounding == _MM_FROUND_CUR_DIRECTION &&
_MM_GET_ROUNDING_MODE() == _MM_ROUND_UP)) {
return _mm_ceil_ps(a);
}
return _mm_set_ps(v_float[3] > 0 ? floorf(v_float[3]) : ceilf(v_float[3]),
v_float[2] > 0 ? floorf(v_float[2]) : ceilf(v_float[2]),
v_float[1] > 0 ? floorf(v_float[1]) : ceilf(v_float[1]),
v_float[0] > 0 ? floorf(v_float[0]) : ceilf(v_float[0]));
#endif
}