in native/src/seal/evaluator.cpp [453:594]
void Evaluator::ckks_multiply(Ciphertext &encrypted1, const Ciphertext &encrypted2, MemoryPoolHandle pool) const
{
if (!(encrypted1.is_ntt_form() && encrypted2.is_ntt_form()))
{
throw invalid_argument("encrypted1 or encrypted2 must be in NTT form");
}
// Extract encryption parameters.
auto &context_data = *context_.get_context_data(encrypted1.parms_id());
auto &parms = context_data.parms();
size_t coeff_count = parms.poly_modulus_degree();
size_t coeff_modulus_size = parms.coeff_modulus().size();
size_t encrypted1_size = encrypted1.size();
size_t encrypted2_size = encrypted2.size();
double new_scale = encrypted1.scale() * encrypted2.scale();
if (!is_scale_within_bounds(new_scale, context_data))
{
throw invalid_argument("scale out of bounds");
}
// Determine destination.size()
// Default is 3 (c_0, c_1, c_2)
size_t dest_size = sub_safe(add_safe(encrypted1_size, encrypted2_size), size_t(1));
// Size check
if (!product_fits_in(dest_size, coeff_count, coeff_modulus_size))
{
throw logic_error("invalid parameters");
}
// Set up iterator for the base
auto coeff_modulus = iter(parms.coeff_modulus());
// Prepare destination
encrypted1.resize(context_, context_data.parms_id(), dest_size);
// Set up iterators for input ciphertexts
PolyIter encrypted1_iter = iter(encrypted1);
ConstPolyIter encrypted2_iter = iter(encrypted2);
if (dest_size == 3)
{
// We want to keep six polynomials in the L1 cache: x[0], x[1], x[2], y[0], y[1], temp.
// For a 32KiB cache, which can store 32768 / 8 = 4096 coefficients, = 682.67 coefficients per polynomial,
// we should keep the tile size at 682 or below. The tile size must divide coeff_count, i.e. be a power of
// two. Some testing shows similar performance with tile size 256 and 512, and worse performance on smaller
// tiles. We pick the smaller of the two to prevent L1 cache misses on processors with < 32 KiB L1 cache.
size_t tile_size = min<size_t>(coeff_count, size_t(256));
size_t num_tiles = coeff_count / tile_size;
#ifdef SEAL_DEBUG
if (coeff_count % tile_size != 0)
{
throw invalid_argument("tile_size does not divide coeff_count");
}
#endif
// Semantic misuse of RNSIter; each is really pointing to the data for each RNS factor in sequence
ConstRNSIter encrypted2_0_iter(*encrypted2_iter[0], tile_size);
ConstRNSIter encrypted2_1_iter(*encrypted2_iter[1], tile_size);
RNSIter encrypted1_0_iter(*encrypted1_iter[0], tile_size);
RNSIter encrypted1_1_iter(*encrypted1_iter[1], tile_size);
RNSIter encrypted1_2_iter(*encrypted1_iter[2], tile_size);
// Temporary buffer to store intermediate results
SEAL_ALLOCATE_GET_COEFF_ITER(temp, tile_size, pool);
// Computes the output tile_size coefficients at a time
// Given input tuples of polynomials x = (x[0], x[1], x[2]), y = (y[0], y[1]), computes
// x = (x[0] * y[0], x[0] * y[1] + x[1] * y[0], x[1] * y[1])
// with appropriate modular reduction
SEAL_ITERATE(coeff_modulus, coeff_modulus_size, [&](auto I) {
SEAL_ITERATE(iter(size_t(0)), num_tiles, [&](SEAL_MAYBE_UNUSED auto J) {
// Compute third output polynomial, overwriting input
// x[2] = x[1] * y[1]
dyadic_product_coeffmod(
encrypted1_1_iter[0], encrypted2_1_iter[0], tile_size, I, encrypted1_2_iter[0]);
// Compute second output polynomial, overwriting input
// temp = x[1] * y[0]
dyadic_product_coeffmod(encrypted1_1_iter[0], encrypted2_0_iter[0], tile_size, I, temp);
// x[1] = x[0] * y[1]
dyadic_product_coeffmod(
encrypted1_0_iter[0], encrypted2_1_iter[0], tile_size, I, encrypted1_1_iter[0]);
// x[1] += temp
add_poly_coeffmod(encrypted1_1_iter[0], temp, tile_size, I, encrypted1_1_iter[0]);
// Compute first output polynomial, overwriting input
// x[0] = x[0] * y[0]
dyadic_product_coeffmod(
encrypted1_0_iter[0], encrypted2_0_iter[0], tile_size, I, encrypted1_0_iter[0]);
// Manually increment iterators
encrypted1_0_iter++;
encrypted1_1_iter++;
encrypted1_2_iter++;
encrypted2_0_iter++;
encrypted2_1_iter++;
});
});
}
else
{
// Allocate temporary space for the result
SEAL_ALLOCATE_ZERO_GET_POLY_ITER(temp, dest_size, coeff_count, coeff_modulus_size, pool);
SEAL_ITERATE(iter(size_t(0)), dest_size, [&](auto I) {
// We iterate over relevant components of encrypted1 and encrypted2 in increasing order for
// encrypted1 and reversed (decreasing) order for encrypted2. The bounds for the indices of
// the relevant terms are obtained as follows.
size_t curr_encrypted1_last = min<size_t>(I, encrypted1_size - 1);
size_t curr_encrypted2_first = min<size_t>(I, encrypted2_size - 1);
size_t curr_encrypted1_first = I - curr_encrypted2_first;
// size_t curr_encrypted2_last = secret_power_index - curr_encrypted1_last;
// The total number of dyadic products is now easy to compute
size_t steps = curr_encrypted1_last - curr_encrypted1_first + 1;
// Create a shifted iterator for the first input
auto shifted_encrypted1_iter = encrypted1_iter + curr_encrypted1_first;
// Create a shifted reverse iterator for the second input
auto shifted_reversed_encrypted2_iter = reverse_iter(encrypted2_iter + curr_encrypted2_first);
SEAL_ITERATE(iter(shifted_encrypted1_iter, shifted_reversed_encrypted2_iter), steps, [&](auto J) {
// Extra care needed here:
// temp_iter must be dereferenced once to produce an appropriate RNSIter
SEAL_ITERATE(iter(J, coeff_modulus, temp[I]), coeff_modulus_size, [&](auto K) {
SEAL_ALLOCATE_GET_COEFF_ITER(prod, coeff_count, pool);
dyadic_product_coeffmod(get<0, 0>(K), get<0, 1>(K), coeff_count, get<1>(K), prod);
add_poly_coeffmod(prod, get<2>(K), coeff_count, get<1>(K), get<2>(K));
});
});
});
// Set the final result
set_poly_array(temp, dest_size, coeff_count, coeff_modulus_size, encrypted1.data());
}
// Set the scale
encrypted1.scale() = new_scale;
}