in subprojects/speex/resample_neon.h [142:200]
static inline float inner_product_single(const float *a, const float *b, unsigned int len)
{
float ret;
uint32_t remainder = len % 16;
len = len - remainder;
asm volatile (" cmp %[len], #0\n"
" bne 1f\n"
" vld1.32 {q4}, [%[b]]!\n"
" vld1.32 {q8}, [%[a]]!\n"
" subs %[remainder], %[remainder], #4\n"
" vmul.f32 q0, q4, q8\n"
" bne 4f\n"
" b 5f\n"
"1:"
" vld1.32 {q4, q5}, [%[b]]!\n"
" vld1.32 {q8, q9}, [%[a]]!\n"
" vld1.32 {q6, q7}, [%[b]]!\n"
" vld1.32 {q10, q11}, [%[a]]!\n"
" subs %[len], %[len], #16\n"
" vmul.f32 q0, q4, q8\n"
" vmul.f32 q1, q5, q9\n"
" vmul.f32 q2, q6, q10\n"
" vmul.f32 q3, q7, q11\n"
" beq 3f\n"
"2:"
" vld1.32 {q4, q5}, [%[b]]!\n"
" vld1.32 {q8, q9}, [%[a]]!\n"
" vld1.32 {q6, q7}, [%[b]]!\n"
" vld1.32 {q10, q11}, [%[a]]!\n"
" subs %[len], %[len], #16\n"
" vmla.f32 q0, q4, q8\n"
" vmla.f32 q1, q5, q9\n"
" vmla.f32 q2, q6, q10\n"
" vmla.f32 q3, q7, q11\n"
" bne 2b\n"
"3:"
" vadd.f32 q4, q0, q1\n"
" vadd.f32 q5, q2, q3\n"
" cmp %[remainder], #0\n"
" vadd.f32 q0, q4, q5\n"
" beq 5f\n"
"4:"
" vld1.32 {q6}, [%[b]]!\n"
" vld1.32 {q10}, [%[a]]!\n"
" subs %[remainder], %[remainder], #4\n"
" vmla.f32 q0, q6, q10\n"
" bne 4b\n"
"5:"
" vadd.f32 d0, d0, d1\n"
" vpadd.f32 d0, d0, d0\n"
" vmov.f32 %[ret], d0[0]\n"
: [ret] "=&r" (ret), [a] "+r" (a), [b] "+r" (b),
[len] "+l" (len), [remainder] "+l" (remainder)
:
: "cc", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8",
"q9", "q10", "q11");
return ret;
}