in IsometricPatternMatcher/Image.h [391:400]
inline float hsum(__m256 v) {
__m128 v2 = _mm256_extractf128_ps(v, 1);
__m128 v1 = _mm256_castps256_ps128(v);
v1 = _mm_add_ps(v1, v2);
v2 = _mm_movehdup_ps(v1); // broadcast elements 3,1 to 2,0
v1 = _mm_add_ps(v1, v2);
v2 = _mm_movehl_ps(v2, v1); // high half -> low half
v1 = _mm_add_ss(v1, v2);
return _mm_cvtss_f32(v1);
}