inline float hsum()

in IsometricPatternMatcher/Image.h [391:400]


inline float hsum(__m256 v) {
  __m128 v2 = _mm256_extractf128_ps(v, 1);
  __m128 v1 = _mm256_castps256_ps128(v);
  v1 = _mm_add_ps(v1, v2);
  v2 = _mm_movehdup_ps(v1);  // broadcast elements 3,1 to 2,0
  v1 = _mm_add_ps(v1, v2);
  v2 = _mm_movehl_ps(v2, v1);  // high half -> low half
  v1 = _mm_add_ss(v1, v2);
  return _mm_cvtss_f32(v1);
}