in native/src/seal/util/dwthandler.h [202:356]
void transform_from_rev(
ValueType *values, int log_n, const RootType *roots, const ScalarType *scalar = nullptr) const
{
// constant transform size
size_t n = size_t(1) << log_n;
// registers to hold temporary values
RootType r;
ValueType u;
ValueType v;
// pointers for faster indexing
ValueType *x = nullptr;
ValueType *y = nullptr;
// variables for indexing
std::size_t gap = 1;
std::size_t m = n >> 1;
for (; m > 1; m >>= 1)
{
std::size_t offset = 0;
if (gap < 4)
{
for (std::size_t i = 0; i < m; i++)
{
r = *++roots;
x = values + offset;
y = x + gap;
for (std::size_t j = 0; j < gap; j++)
{
u = *x;
v = *y;
*x++ = arithmetic_.guard(arithmetic_.add(u, v));
*y++ = arithmetic_.mul_root(arithmetic_.sub(u, v), r);
}
offset += gap << 1;
}
}
else
{
for (std::size_t i = 0; i < m; i++)
{
r = *++roots;
x = values + offset;
y = x + gap;
for (std::size_t j = 0; j < gap; j += 4)
{
u = *x;
v = *y;
*x++ = arithmetic_.guard(arithmetic_.add(u, v));
*y++ = arithmetic_.mul_root(arithmetic_.sub(u, v), r);
u = *x;
v = *y;
*x++ = arithmetic_.guard(arithmetic_.add(u, v));
*y++ = arithmetic_.mul_root(arithmetic_.sub(u, v), r);
u = *x;
v = *y;
*x++ = arithmetic_.guard(arithmetic_.add(u, v));
*y++ = arithmetic_.mul_root(arithmetic_.sub(u, v), r);
u = *x;
v = *y;
*x++ = arithmetic_.guard(arithmetic_.add(u, v));
*y++ = arithmetic_.mul_root(arithmetic_.sub(u, v), r);
}
offset += gap << 1;
}
}
gap <<= 1;
}
if (scalar != nullptr)
{
r = *++roots;
RootType scaled_r = arithmetic_.mul_root_scalar(r, *scalar);
x = values;
y = x + gap;
if (gap < 4)
{
for (std::size_t j = 0; j < gap; j++)
{
u = arithmetic_.guard(*x);
v = *y;
*x++ = arithmetic_.mul_scalar(arithmetic_.guard(arithmetic_.add(u, v)), *scalar);
*y++ = arithmetic_.mul_root(arithmetic_.sub(u, v), scaled_r);
}
}
else
{
for (std::size_t j = 0; j < gap; j += 4)
{
u = arithmetic_.guard(*x);
v = *y;
*x++ = arithmetic_.mul_scalar(arithmetic_.guard(arithmetic_.add(u, v)), *scalar);
*y++ = arithmetic_.mul_root(arithmetic_.sub(u, v), scaled_r);
u = arithmetic_.guard(*x);
v = *y;
*x++ = arithmetic_.mul_scalar(arithmetic_.guard(arithmetic_.add(u, v)), *scalar);
*y++ = arithmetic_.mul_root(arithmetic_.sub(u, v), scaled_r);
u = arithmetic_.guard(*x);
v = *y;
*x++ = arithmetic_.mul_scalar(arithmetic_.guard(arithmetic_.add(u, v)), *scalar);
*y++ = arithmetic_.mul_root(arithmetic_.sub(u, v), scaled_r);
u = arithmetic_.guard(*x);
v = *y;
*x++ = arithmetic_.mul_scalar(arithmetic_.guard(arithmetic_.add(u, v)), *scalar);
*y++ = arithmetic_.mul_root(arithmetic_.sub(u, v), scaled_r);
}
}
}
else
{
r = *++roots;
x = values;
y = x + gap;
if (gap < 4)
{
for (std::size_t j = 0; j < gap; j++)
{
u = *x;
v = *y;
*x++ = arithmetic_.guard(arithmetic_.add(u, v));
*y++ = arithmetic_.mul_root(arithmetic_.sub(u, v), r);
}
}
else
{
for (std::size_t j = 0; j < gap; j += 4)
{
u = *x;
v = *y;
*x++ = arithmetic_.guard(arithmetic_.add(u, v));
*y++ = arithmetic_.mul_root(arithmetic_.sub(u, v), r);
u = *x;
v = *y;
*x++ = arithmetic_.guard(arithmetic_.add(u, v));
*y++ = arithmetic_.mul_root(arithmetic_.sub(u, v), r);
u = *x;
v = *y;
*x++ = arithmetic_.guard(arithmetic_.add(u, v));
*y++ = arithmetic_.mul_root(arithmetic_.sub(u, v), r);
u = *x;
v = *y;
*x++ = arithmetic_.guard(arithmetic_.add(u, v));
*y++ = arithmetic_.mul_root(arithmetic_.sub(u, v), r);
}
}
}
}