static int s2n_rand_get_entropy_from_rdrand()

in utils/s2n_random.c [690:795]


static int s2n_rand_get_entropy_from_rdrand(void *data, uint32_t size)
{
#if defined(__x86_64__) || defined(__i386__)
    struct s2n_blob out = { 0 };
    POSIX_GUARD(s2n_blob_init(&out, data, size));
    size_t space_remaining = 0;
    struct s2n_stuffer stuffer = { 0 };
    union {
        uint64_t u64;
    #if defined(__i386__)
        struct {
            /* since we check first that we're on intel, we can safely assume little endian. */
            uint32_t u_low;
            uint32_t u_high;
        } i386_fields;
    #endif /* defined(__i386__) */
        uint8_t u8[8];
    } output;

    POSIX_GUARD(s2n_stuffer_init(&stuffer, &out));
    while ((space_remaining = s2n_stuffer_space_remaining(&stuffer))) {
        unsigned char success = 0;
        output.u64 = 0;

        for (int tries = 0; tries < 10; tries++) {
    #if defined(__i386__)
            /* execute the rdrand instruction, store the result in a general purpose register (it's assigned to
            * output.i386_fields.u_low). Check the carry bit, which will be set on success. Then clober the register and reset
            * the carry bit. Due to needing to support an ancient assembler we use the opcode syntax.
            * the %b1 is to force compilers to use c1 instead of ecx.
            * Here's a description of how the opcode is encoded:
            * 0x0fc7 (rdrand)
            * 0xf0 (store the result in eax).
            */
            unsigned char success_high = 0, success_low = 0;
            __asm__ __volatile__(
                    ".byte 0x0f, 0xc7, 0xf0;\n"
                    "setc %b1;\n"
                    : "=&a"(output.i386_fields.u_low), "=qm"(success_low)
                    :
                    : "cc");

            __asm__ __volatile__(
                    ".byte 0x0f, 0xc7, 0xf0;\n"
                    "setc %b1;\n"
                    : "=&a"(output.i386_fields.u_high), "=qm"(success_high)
                    :
                    : "cc");
            /* cppcheck-suppress knownConditionTrueFalse */
            success = success_high & success_low;

            /* Treat either all 1 or all 0 bits in either the high or low order
             * bits as failure */
            if (output.i386_fields.u_low == 0 || output.i386_fields.u_low == UINT32_MAX
                    || output.i386_fields.u_high == 0 || output.i386_fields.u_high == UINT32_MAX) {
                success = 0;
            }
    #else
            /* execute the rdrand instruction, store the result in a general purpose register (it's assigned to
            * output.u64). Check the carry bit, which will be set on success. Then clober the carry bit.
            * Due to needing to support an ancient assembler we use the opcode syntax.
            * the %b1 is to force compilers to use c1 instead of ecx.
            * Here's a description of how the opcode is encoded:
            * 0x48 (pick a 64-bit register it does more too, but that's all that matters there)
            * 0x0fc7 (rdrand)
            * 0xf0 (store the result in rax). */
            __asm__ __volatile__(
                    ".byte 0x48, 0x0f, 0xc7, 0xf0;\n"
                    "setc %b1;\n"
                    : "=&a"(output.u64), "=qm"(success)
                    :
                    : "cc");
    #endif /* defined(__i386__) */

            /* Some AMD CPUs will find that RDRAND "sticks" on all 1s but still reports success.
             * Some other very old CPUs use all 0s as an error condition while still reporting success.
             * If we encounter either of these suspicious values (a 1/2^63 chance) we'll treat them as
             * a failure and generate a new value.
             *
             * In the future we could add CPUID checks to detect processors with these known bugs,
             * however it does not appear worth it. The entropy loss is negligible and the
             * corresponding likelihood that a healthy CPU generates either of these values is also
             * negligible (1/2^63). Finally, adding processor specific logic would greatly
             * increase the complexity and would cause us to "miss" any unknown processors with
             * similar bugs. */
            if (output.u64 == UINT64_MAX || output.u64 == 0) {
                success = 0;
            }

            if (success) {
                break;
            }
        }

        POSIX_ENSURE(success, S2N_ERR_RDRAND_FAILED);

        size_t data_to_fill = MIN(sizeof(output), space_remaining);

        POSIX_GUARD(s2n_stuffer_write_bytes(&stuffer, output.u8, data_to_fill));
    }

    return S2N_SUCCESS;
#else
    POSIX_BAIL(S2N_ERR_UNSUPPORTED_CPU);
#endif
}