in include/Algo-Direct2.h [104:137]
void resolve(const FVec<SSE, double>& vz, const IVec<SSE, float>& bidx, uint32 *pr) const
{
const uint32* buckets = reinterpret_cast<const uint32 *>(base_t::data.buckets);
const double *xi = base_t::data.xi;
uint32 b1 = buckets[bidx.get1()];
uint32 b0 = buckets[bidx.get0()];
const double *p1 = &xi[b1];
const double *p0 = &xi[b0];
// read pairs ( X(t-1), X(t) )
__m128d vx1 = _mm_loadu_pd(p1);
__m128d vx0 = _mm_loadu_pd(p0);
// build:
// { X(t(0)-1), X(t(1)-1) }
// { X(t(0)), X(t(1)) }
__m128d vxm = _mm_shuffle_pd(vx0, vx1, 0);
__m128d vxp = _mm_shuffle_pd(vx0, vx1, 3);
IVec<SSE, double> i(b1, b0);
IVec<SSE, double> vlem = (vz < vxm);
IVec<SSE, double> vlep = (vz < vxp);
i = i + vlem + vlep;
union {
__m128i vec;
uint32 ui32[4];
} u;
u.vec = i;
pr[0] = u.ui32[0];
pr[1] = u.ui32[2];
}