crypto/fipsmodule/ml_kem/mlkem/compress.c (398 lines of code) (raw):

/* * Copyright (c) 2024-2025 The mlkem-native project authors * SPDX-License-Identifier: Apache-2.0 */ #include "common.h" #if !defined(MLK_CONFIG_MULTILEVEL_NO_SHARED) #include <stdint.h> #include <string.h> #include "cbmc.h" #include "compress.h" #include "debug.h" #include "verify.h" #if defined(MLK_CONFIG_MULTILEVEL_WITH_SHARED) || (MLKEM_K == 2 || MLKEM_K == 3) #if !defined(MLK_USE_NATIVE_POLY_COMPRESS_D4) /* Reference: `poly_compress()` in the reference implementation, * for ML-KEM-{512,768}. * - In contrast to the reference implementation, we assume * unsigned canonical coefficients here. * The reference implementation works with coefficients * in the range (-MLKEM_Q+1,...,MLKEM_Q-1). */ MLK_INTERNAL_API void mlk_poly_compress_d4(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D4], const mlk_poly *a) { unsigned i; mlk_assert_bound(a, MLKEM_N, 0, MLKEM_Q); for (i = 0; i < MLKEM_N / 8; i++) __loop__(invariant(i <= MLKEM_N / 8)) { unsigned j; uint8_t t[8] = {0}; for (j = 0; j < 8; j++) __loop__( invariant(i <= MLKEM_N / 8 && j <= 8) invariant(array_bound(t, 0, j, 0, 16))) { t[j] = mlk_scalar_compress_d4(a->coeffs[8 * i + j]); } r[i * 4] = t[0] | (t[1] << 4); r[i * 4 + 1] = t[2] | (t[3] << 4); r[i * 4 + 2] = t[4] | (t[5] << 4); r[i * 4 + 3] = t[6] | (t[7] << 4); } } #else /* !MLK_USE_NATIVE_POLY_COMPRESS_D4 */ MLK_INTERNAL_API void mlk_poly_compress_d4(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D4], const mlk_poly *a) { mlk_assert_bound(a, MLKEM_N, 0, MLKEM_Q); mlk_poly_compress_d4_native(r, a->coeffs); } #endif /* MLK_USE_NATIVE_POLY_COMPRESS_D4 */ #if !defined(MLK_USE_NATIVE_POLY_COMPRESS_D10) /* Reference: Embedded into `polyvec_compress()` in the * reference implementation, for ML-KEM-{512,768}. * - In contrast to the reference implementation, we assume * unsigned canonical coefficients here. * The reference implementation works with coefficients * in the range (-MLKEM_Q+1,...,MLKEM_Q-1). */ MLK_INTERNAL_API void mlk_poly_compress_d10(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D10], const mlk_poly *a) { unsigned j; mlk_assert_bound(a, MLKEM_N, 0, MLKEM_Q); for (j = 0; j < MLKEM_N / 4; j++) __loop__(invariant(j <= MLKEM_N / 4)) { unsigned k; uint16_t t[4]; for (k = 0; k < 4; k++) __loop__( invariant(k <= 4) invariant(forall(r, 0, k, t[r] < (1u << 10)))) { t[k] = mlk_scalar_compress_d10(a->coeffs[4 * j + k]); } /* * Make all implicit truncation explicit. No data is being * truncated for the LHS's since each t[i] is 10-bit in size. */ r[5 * j + 0] = (t[0] >> 0) & 0xFF; r[5 * j + 1] = (t[0] >> 8) | ((t[1] << 2) & 0xFF); r[5 * j + 2] = (t[1] >> 6) | ((t[2] << 4) & 0xFF); r[5 * j + 3] = (t[2] >> 4) | ((t[3] << 6) & 0xFF); r[5 * j + 4] = (t[3] >> 2); } } #else /* !MLK_USE_NATIVE_POLY_COMPRESS_D10 */ MLK_INTERNAL_API void mlk_poly_compress_d10(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D10], const mlk_poly *a) { mlk_assert_bound(a, MLKEM_N, 0, MLKEM_Q); mlk_poly_compress_d10_native(r, a->coeffs); } #endif /* MLK_USE_NATIVE_POLY_COMPRESS_D10 */ #if !defined(MLK_USE_NATIVE_POLY_DECOMPRESS_D4) /* Reference: `poly_decompress()` in the reference implementation, * for ML-KEM-{512,768}. */ MLK_INTERNAL_API void mlk_poly_decompress_d4(mlk_poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D4]) { unsigned i; for (i = 0; i < MLKEM_N / 2; i++) __loop__( invariant(i <= MLKEM_N / 2) invariant(array_bound(r->coeffs, 0, 2 * i, 0, MLKEM_Q))) { r->coeffs[2 * i + 0] = mlk_scalar_decompress_d4((a[i] >> 0) & 0xF); r->coeffs[2 * i + 1] = mlk_scalar_decompress_d4((a[i] >> 4) & 0xF); } mlk_assert_bound(r, MLKEM_N, 0, MLKEM_Q); } #else /* !MLK_USE_NATIVE_POLY_DECOMPRESS_D4 */ MLK_INTERNAL_API void mlk_poly_decompress_d4(mlk_poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D4]) { mlk_poly_decompress_d4_native(r->coeffs, a); mlk_assert_bound(r, MLKEM_N, 0, MLKEM_Q); } #endif /* MLK_USE_NATIVE_POLY_DECOMPRESS_D4 */ #if !defined(MLK_USE_NATIVE_POLY_DECOMPRESS_D10) /* Reference: Embedded into `polyvec_decompress()` in the * reference implementation, for ML-KEM-{512,768}. */ MLK_INTERNAL_API void mlk_poly_decompress_d10(mlk_poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D10]) { unsigned j; for (j = 0; j < MLKEM_N / 4; j++) __loop__( invariant(j <= MLKEM_N / 4) invariant(array_bound(r->coeffs, 0, 4 * j, 0, MLKEM_Q))) { unsigned k; uint16_t t[4]; uint8_t const *base = &a[5 * j]; t[0] = 0x3FF & ((base[0] >> 0) | ((uint16_t)base[1] << 8)); t[1] = 0x3FF & ((base[1] >> 2) | ((uint16_t)base[2] << 6)); t[2] = 0x3FF & ((base[2] >> 4) | ((uint16_t)base[3] << 4)); t[3] = 0x3FF & ((base[3] >> 6) | ((uint16_t)base[4] << 2)); for (k = 0; k < 4; k++) __loop__( invariant(k <= 4) invariant(array_bound(r->coeffs, 0, 4 * j + k, 0, MLKEM_Q))) { r->coeffs[4 * j + k] = mlk_scalar_decompress_d10(t[k]); } } mlk_assert_bound(r, MLKEM_N, 0, MLKEM_Q); } #else /* !MLK_USE_NATIVE_POLY_DECOMPRESS_D10 */ MLK_INTERNAL_API void mlk_poly_decompress_d10(mlk_poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D10]) { mlk_poly_decompress_d10_native(r->coeffs, a); mlk_assert_bound(r, MLKEM_N, 0, MLKEM_Q); } #endif /* MLK_USE_NATIVE_POLY_DECOMPRESS_D10 */ #endif /* MLK_CONFIG_MULTILEVEL_WITH_SHARED || MLKEM_K == 2 || MLKEM_K == 3 */ #if defined(MLK_CONFIG_MULTILEVEL_WITH_SHARED) || MLKEM_K == 4 #if !defined(MLK_USE_NATIVE_POLY_COMPRESS_D5) /* Reference: `poly_compress()` in the reference implementation, * for ML-KEM-1024. * - In contrast to the reference implementation, we assume * unsigned canonical coefficients here. * The reference implementation works with coefficients * in the range (-MLKEM_Q+1,...,MLKEM_Q-1). */ MLK_INTERNAL_API void mlk_poly_compress_d5(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D5], const mlk_poly *a) { unsigned i; mlk_assert_bound(a, MLKEM_N, 0, MLKEM_Q); for (i = 0; i < MLKEM_N / 8; i++) __loop__(invariant(i <= MLKEM_N / 8)) { unsigned j; uint8_t t[8] = {0}; for (j = 0; j < 8; j++) __loop__( invariant(i <= MLKEM_N / 8 && j <= 8) invariant(array_bound(t, 0, j, 0, 32))) { t[j] = mlk_scalar_compress_d5(a->coeffs[8 * i + j]); } /* * Explicitly truncate to avoid warning about * implicit truncation in CBMC, and use array indexing into * r rather than pointer-arithmetic to simplify verification */ r[i * 5] = 0xFF & ((t[0] >> 0) | (t[1] << 5)); r[i * 5 + 1] = 0xFF & ((t[1] >> 3) | (t[2] << 2) | (t[3] << 7)); r[i * 5 + 2] = 0xFF & ((t[3] >> 1) | (t[4] << 4)); r[i * 5 + 3] = 0xFF & ((t[4] >> 4) | (t[5] << 1) | (t[6] << 6)); r[i * 5 + 4] = 0xFF & ((t[6] >> 2) | (t[7] << 3)); } } #else /* !MLK_USE_NATIVE_POLY_COMPRESS_D5 */ MLK_INTERNAL_API void mlk_poly_compress_d5(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D5], const mlk_poly *a) { mlk_assert_bound(a, MLKEM_N, 0, MLKEM_Q); mlk_poly_compress_d5_native(r, a->coeffs); } #endif /* MLK_USE_NATIVE_POLY_COMPRESS_D5 */ #if !defined(MLK_USE_NATIVE_POLY_COMPRESS_D11) /* Reference: Embedded into `polyvec_compress()` in the * reference implementation, for ML-KEM-1024. * - In contrast to the reference implementation, we assume * unsigned canonical coefficients here. * The reference implementation works with coefficients * in the range (-MLKEM_Q+1,...,MLKEM_Q-1). */ MLK_INTERNAL_API void mlk_poly_compress_d11(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D11], const mlk_poly *a) { unsigned j; mlk_assert_bound(a, MLKEM_N, 0, MLKEM_Q); for (j = 0; j < MLKEM_N / 8; j++) __loop__(invariant(j <= MLKEM_N / 8)) { unsigned k; uint16_t t[8]; for (k = 0; k < 8; k++) __loop__( invariant(k <= 8) invariant(forall(r, 0, k, t[r] < (1u << 11)))) { t[k] = mlk_scalar_compress_d11(a->coeffs[8 * j + k]); } /* * Make all implicit truncation explicit. No data is being * truncated for the LHS's since each t[i] is 11-bit in size. */ r[11 * j + 0] = (t[0] >> 0) & 0xFF; r[11 * j + 1] = (t[0] >> 8) | ((t[1] << 3) & 0xFF); r[11 * j + 2] = (t[1] >> 5) | ((t[2] << 6) & 0xFF); r[11 * j + 3] = (t[2] >> 2) & 0xFF; r[11 * j + 4] = (t[2] >> 10) | ((t[3] << 1) & 0xFF); r[11 * j + 5] = (t[3] >> 7) | ((t[4] << 4) & 0xFF); r[11 * j + 6] = (t[4] >> 4) | ((t[5] << 7) & 0xFF); r[11 * j + 7] = (t[5] >> 1) & 0xFF; r[11 * j + 8] = (t[5] >> 9) | ((t[6] << 2) & 0xFF); r[11 * j + 9] = (t[6] >> 6) | ((t[7] << 5) & 0xFF); r[11 * j + 10] = (t[7] >> 3); } } #else /* !MLK_USE_NATIVE_POLY_COMPRESS_D11 */ MLK_INTERNAL_API void mlk_poly_compress_d11(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D11], const mlk_poly *a) { mlk_assert_bound(a, MLKEM_N, 0, MLKEM_Q); mlk_poly_compress_d11_native(r, a->coeffs); } #endif /* MLK_USE_NATIVE_POLY_COMPRESS_D11 */ #if !defined(MLK_USE_NATIVE_POLY_DECOMPRESS_D5) /* Reference: `poly_decompress()` in the reference implementation, * for ML-KEM-1024. */ MLK_INTERNAL_API void mlk_poly_decompress_d5(mlk_poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D5]) { unsigned i; for (i = 0; i < MLKEM_N / 8; i++) __loop__( invariant(i <= MLKEM_N / 8) invariant(array_bound(r->coeffs, 0, 8 * i, 0, MLKEM_Q))) { unsigned j; uint8_t t[8]; const unsigned offset = i * 5; /* * Explicitly truncate to avoid warning about * implicit truncation in CBMC and unwind loop for ease * of proof. */ /* * Decompress 5 8-bit bytes (so 40 bits) into * 8 5-bit values stored in t[] */ t[0] = 0x1F & (a[offset + 0] >> 0); t[1] = 0x1F & ((a[offset + 0] >> 5) | (a[offset + 1] << 3)); t[2] = 0x1F & (a[offset + 1] >> 2); t[3] = 0x1F & ((a[offset + 1] >> 7) | (a[offset + 2] << 1)); t[4] = 0x1F & ((a[offset + 2] >> 4) | (a[offset + 3] << 4)); t[5] = 0x1F & (a[offset + 3] >> 1); t[6] = 0x1F & ((a[offset + 3] >> 6) | (a[offset + 4] << 2)); t[7] = 0x1F & (a[offset + 4] >> 3); /* and copy to the correct slice in r[] */ for (j = 0; j < 8; j++) __loop__( invariant(j <= 8 && i <= MLKEM_N / 8) invariant(array_bound(r->coeffs, 0, 8 * i + j, 0, MLKEM_Q))) { r->coeffs[8 * i + j] = mlk_scalar_decompress_d5(t[j]); } } mlk_assert_bound(r, MLKEM_N, 0, MLKEM_Q); } #else /* !MLK_USE_NATIVE_POLY_DECOMPRESS_D5 */ MLK_INTERNAL_API void mlk_poly_decompress_d5(mlk_poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D5]) { mlk_poly_decompress_d5_native(r->coeffs, a); mlk_assert_bound(r, MLKEM_N, 0, MLKEM_Q); } #endif /* MLK_USE_NATIVE_POLY_DECOMPRESS_D5 */ #if !defined(MLK_USE_NATIVE_POLY_DECOMPRESS_D11) /* Reference: Embedded into `polyvec_decompress()` in the * reference implementation, for ML-KEM-1024. */ MLK_INTERNAL_API void mlk_poly_decompress_d11(mlk_poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D11]) { unsigned j; for (j = 0; j < MLKEM_N / 8; j++) __loop__( invariant(j <= MLKEM_N / 8) invariant(array_bound(r->coeffs, 0, 8 * j, 0, MLKEM_Q))) { unsigned k; uint16_t t[8]; uint8_t const *base = &a[11 * j]; t[0] = 0x7FF & ((base[0] >> 0) | ((uint16_t)base[1] << 8)); t[1] = 0x7FF & ((base[1] >> 3) | ((uint16_t)base[2] << 5)); t[2] = 0x7FF & ((base[2] >> 6) | ((uint16_t)base[3] << 2) | ((uint16_t)base[4] << 10)); t[3] = 0x7FF & ((base[4] >> 1) | ((uint16_t)base[5] << 7)); t[4] = 0x7FF & ((base[5] >> 4) | ((uint16_t)base[6] << 4)); t[5] = 0x7FF & ((base[6] >> 7) | ((uint16_t)base[7] << 1) | ((uint16_t)base[8] << 9)); t[6] = 0x7FF & ((base[8] >> 2) | ((uint16_t)base[9] << 6)); t[7] = 0x7FF & ((base[9] >> 5) | ((uint16_t)base[10] << 3)); for (k = 0; k < 8; k++) __loop__( invariant(k <= 8) invariant(array_bound(r->coeffs, 0, 8 * j + k, 0, MLKEM_Q))) { r->coeffs[8 * j + k] = mlk_scalar_decompress_d11(t[k]); } } mlk_assert_bound(r, MLKEM_N, 0, MLKEM_Q); } #else /* !MLK_USE_NATIVE_POLY_DECOMPRESS_D11 */ MLK_INTERNAL_API void mlk_poly_decompress_d11(mlk_poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D11]) { mlk_poly_decompress_d11_native(r->coeffs, a); mlk_assert_bound(r, MLKEM_N, 0, MLKEM_Q); } #endif /* MLK_USE_NATIVE_POLY_DECOMPRESS_D11 */ #endif /* MLK_CONFIG_MULTILEVEL_WITH_SHARED || MLKEM_K == 4 */ #if !defined(MLK_USE_NATIVE_POLY_TOBYTES) /* Reference: `poly_tobytes()` in the reference implementation. * - In contrast to the reference implementation, we assume * unsigned canonical coefficients here. * The reference implementation works with coefficients * in the range (-MLKEM_Q+1,...,MLKEM_Q-1). */ MLK_INTERNAL_API void mlk_poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const mlk_poly *a) { unsigned i; mlk_assert_bound(a, MLKEM_N, 0, MLKEM_Q); for (i = 0; i < MLKEM_N / 2; i++) __loop__(invariant(i <= MLKEM_N / 2)) { const uint16_t t0 = a->coeffs[2 * i]; const uint16_t t1 = a->coeffs[2 * i + 1]; /* * t0 and t1 are both < MLKEM_Q, so contain at most 12 bits each of * significant data, so these can be packed into 24 bits or exactly * 3 bytes, as follows. */ /* Least significant bits 0 - 7 of t0. */ r[3 * i + 0] = t0 & 0xFF; /* * Most significant bits 8 - 11 of t0 become the least significant * nibble of the second byte. The least significant 4 bits * of t1 become the upper nibble of the second byte. */ r[3 * i + 1] = (t0 >> 8) | ((t1 << 4) & 0xF0); /* Bits 4 - 11 of t1 become the third byte. */ r[3 * i + 2] = t1 >> 4; } } #else /* !MLK_USE_NATIVE_POLY_TOBYTES */ MLK_INTERNAL_API void mlk_poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const mlk_poly *a) { mlk_assert_bound(a, MLKEM_N, 0, MLKEM_Q); mlk_poly_tobytes_native(r, a->coeffs); } #endif /* MLK_USE_NATIVE_POLY_TOBYTES */ #if !defined(MLK_USE_NATIVE_POLY_FROMBYTES) /* Reference: `poly_frombytes()` in the reference implementation. */ MLK_INTERNAL_API void mlk_poly_frombytes(mlk_poly *r, const uint8_t a[MLKEM_POLYBYTES]) { unsigned i; for (i = 0; i < MLKEM_N / 2; i++) __loop__( invariant(i <= MLKEM_N / 2) invariant(array_bound(r->coeffs, 0, 2 * i, 0, MLKEM_UINT12_LIMIT))) { const uint8_t t0 = a[3 * i + 0]; const uint8_t t1 = a[3 * i + 1]; const uint8_t t2 = a[3 * i + 2]; r->coeffs[2 * i + 0] = t0 | ((t1 << 8) & 0xFFF); r->coeffs[2 * i + 1] = (t1 >> 4) | (t2 << 4); } /* Note that the coefficients are not canonical */ mlk_assert_bound(r, MLKEM_N, 0, MLKEM_UINT12_LIMIT); } #else /* !MLK_USE_NATIVE_POLY_FROMBYTES */ MLK_INTERNAL_API void mlk_poly_frombytes(mlk_poly *r, const uint8_t a[MLKEM_POLYBYTES]) { mlk_poly_frombytes_native(r->coeffs, a); } #endif /* MLK_USE_NATIVE_POLY_FROMBYTES */ /* Reference: `poly_frommsg()` in the reference implementation. * - We use a value barrier around the bit-selection mask to * reduce the risk of compiler-introduced branches. * The reference implementation contains the expression * `(msg[i] >> j) & 1` which the compiler can reason must * be either 0 or 1. */ MLK_INTERNAL_API void mlk_poly_frommsg(mlk_poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES]) { unsigned i; #if (MLKEM_INDCPA_MSGBYTES != MLKEM_N / 8) #error "MLKEM_INDCPA_MSGBYTES must be equal to MLKEM_N/8 bytes!" #endif for (i = 0; i < MLKEM_N / 8; i++) __loop__( invariant(i <= MLKEM_N / 8) invariant(array_bound(r->coeffs, 0, 8 * i, 0, MLKEM_Q))) { unsigned j; for (j = 0; j < 8; j++) __loop__( invariant(i < MLKEM_N / 8 && j <= 8) invariant(array_bound(r->coeffs, 0, 8 * i + j, 0, MLKEM_Q))) { /* mlk_ct_sel_int16(MLKEM_Q_HALF, 0, b) is `Decompress_1(b != 0)` * as per [FIPS 203, Eq (4.8)]. */ /* Prevent the compiler from recognizing this as a bit selection */ uint8_t mask = mlk_value_barrier_u8(1u << j); r->coeffs[8 * i + j] = mlk_ct_sel_int16(MLKEM_Q_HALF, 0, msg[i] & mask); } } mlk_assert_abs_bound(r, MLKEM_N, MLKEM_Q); } /* Reference: `poly_tomsg()` in the reference implementation. * - In contrast to the reference implementation, we assume * unsigned canonical coefficients here. * The reference implementation works with coefficients * in the range (-MLKEM_Q+1,...,MLKEM_Q-1). */ MLK_INTERNAL_API void mlk_poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const mlk_poly *a) { unsigned i; mlk_assert_bound(a, MLKEM_N, 0, MLKEM_Q); for (i = 0; i < MLKEM_N / 8; i++) __loop__(invariant(i <= MLKEM_N / 8)) { unsigned j; msg[i] = 0; for (j = 0; j < 8; j++) __loop__( invariant(i <= MLKEM_N / 8 && j <= 8)) { uint32_t t = mlk_scalar_compress_d1(a->coeffs[8 * i + j]); msg[i] |= t << j; } } } #else /* !MLK_CONFIG_MULTILEVEL_NO_SHARED */ MLK_EMPTY_CU(compress) #endif /* MLK_CONFIG_MULTILEVEL_NO_SHARED */