INT32 floatToIDotF()

in d3d/archive/images/d3d11/tessellator.cpp [101:289]


INT32 floatToIDotF( const float& input )
{
    // ------------------------------------------------------------------------
    //                                                output fixed point format
    // 32-bit result:
    //
    //      [sign-extend]i.f
    //      |              |
    //      MSB(31)...LSB(0)
    //
    //      f               fractional part of the number, an unsigned
    //                      value with _fxpFracBitCount bits (defined below)
    //
    //      .               implied decimal
    //
    //      i               integer part of the number, a 2's complement
    //                      value with _fxpIntBitCount bits (defined below)
    //
    //      [sign-extend]   MSB of i conditionally replicated
    //
    // ------------------------------------------------------------------------
    // Define fixed point bit counts
    //

    // Commenting out C_ASSERT below to minimise #includes:
    // C_ASSERT( 2 <= c_uIBits && c_uIBits <= 32 && c_uFBits <= 32 && c_uIBits + c_uFBits <= 32 );

    // Define most negative and most positive fixed point values
    const INT32 c_iMinResult = (c_bSigned ? INT32( -1 ) << (c_uIBits + c_uFBits - 1) : 0);
    const INT32 c_iMaxResult = ~c_iMinResult;

    // ------------------------------------------------------------------------
    //                                                constant float properties
    // ------------------------------------------------------------------------
    const UINT8 _fltMantissaBitCount = 23;
    const UINT8 _fltExponentBitCount = 8;
    const INT32 _fltExponentBias     = (INT32( 1 ) << (_fltExponentBitCount - 1)) - 1;
    const INT32 _fltHiddenBit        = INT32( 1 ) << _fltMantissaBitCount;
    const INT32 _fltMantissaMask     = _fltHiddenBit - 1;
    const INT32 _fltExponentMask     = ((INT32( 1 ) << _fltExponentBitCount) - 1) << _fltMantissaBitCount;
    const INT32 _fltSignBit          = INT32( 1 ) << (_fltExponentBitCount + _fltMantissaBitCount);

    // ------------------------------------------------------------------------
    //              define min and max values as floats (clamp to these bounds)
    // ------------------------------------------------------------------------
    INT32 _fxpMaxPosValueFloat;
    INT32 _fxpMaxNegValueFloat;

    if (c_bSigned)
    {
        // The maximum positive fixed point value is 2^(i-1) - 2^(-f).
        // The following constructs the floating point bit pattern for this value,
        // as long as i >= 2.
        _fxpMaxPosValueFloat = (_fltExponentBias + c_uIBits - 1) <<_fltMantissaBitCount;
        const INT32 iShift = _fltMantissaBitCount + 2 - c_uIBits - c_uFBits;
        if (iShift >= 0)
        {
//            assert( iShift < 32 );
#pragma warning( suppress : 4293 )
            _fxpMaxPosValueFloat -= INT32( 1 ) << iShift;
        }

        // The maximum negative fixed point value is -2^(i-1).
        // The following constructs the floating point bit pattern for this value,
        // as long as i >= 2.
        // We need this number without the sign bit
        _fxpMaxNegValueFloat = (_fltExponentBias + c_uIBits - 1) << _fltMantissaBitCount;
    }
    else
    {
        // The maximum positive fixed point value is 2^(i) - 2^(-f).
        // The following constructs the floating point bit pattern for this value,
        // as long as i >= 2.
        _fxpMaxPosValueFloat = (_fltExponentBias + c_uIBits) <<_fltMantissaBitCount;
        const INT32 iShift = _fltMantissaBitCount + 1 - c_uIBits - c_uFBits;
        if (iShift >= 0)
        {
//            assert( iShift < 32 );
#pragma warning( suppress : 4293 )
            _fxpMaxPosValueFloat -= INT32( 1 ) << iShift;
        }

        // The maximum negative fixed point value is 0.
        _fxpMaxNegValueFloat = 0;
    }

    // ------------------------------------------------------------------------
    //                                                float -> fixed conversion
    // ------------------------------------------------------------------------

    // ------------------------------------------------------------------------
    //                                                      examine input float
    // ------------------------------------------------------------------------
    INT32 output              = *(INT32*)&input;
    INT32 unbiasedExponent    = ((output & _fltExponentMask) >> _fltMantissaBitCount) - _fltExponentBias;
    INT32 isNegative          = output & _fltSignBit;

    // ------------------------------------------------------------------------
    //                                                                      nan
    // ------------------------------------------------------------------------
    if (unbiasedExponent == (_fltExponentBias + 1) && (output & _fltMantissaMask))
    {
        // nan converts to 0
        output = 0;
    }
    // ------------------------------------------------------------------------
    //                                                       too large positive
    // ------------------------------------------------------------------------
    else if (!isNegative && output >= _fxpMaxPosValueFloat) // integer compare
    {
        output = c_iMaxResult;
    }
    // ------------------------------------------------------------------------
    //                                                       too large negative
    // ------------------------------------------------------------------------
                                            // integer compare
    else if (isNegative && (output & ~_fltSignBit) >= _fxpMaxNegValueFloat)
    {
        output = c_iMinResult;
    }
    // ------------------------------------------------------------------------
    //                                                                too small
    // ------------------------------------------------------------------------
    else if (unbiasedExponent < -c_uFBits - 1)
    {
        // clamp to 0
        output = 0;
    }
    // ------------------------------------------------------------------------
    //                                                             within range
    // ------------------------------------------------------------------------
    else
    {
        // copy mantissa, add hidden bit
        output = (output & _fltMantissaMask) | _fltHiddenBit;

        INT32 extraBits = _fltMantissaBitCount - c_uFBits - unbiasedExponent;
        if (extraBits >= 0)
        {
            // 2's complement if negative
            if (isNegative)
            {
                output = ~output + 1;
            }

            // From the range checks that led here, it is known that
            // unbiasedExponent < c_uIBits.  So, at most:
            // (a) unbiasedExponent == c_uIBits - 1.
            //
            // From compile validation above, it is known that
            // c_uIBits + c_uFBits <= _fltMantissaBitCount + 1).
            // So, at minimum:
            // (b) _fltMantissaBitCount == _fxtIntBitCount + c_uFBits - 1
            //
            // Substituting (a) and (b) into extraBits calculation above:
            // extraBits >= (_fxtIntBitCount + c_uFBits - 1)
            //              - c_uFBits - (c_uIBits - 1)
            // extraBits >= 0
            //
            // Thus we only have to worry about shifting right by 0 or more
            // bits to get the decimal to the right place, and never have
            // to shift left.

            INT32 LSB             = 1 << extraBits; // last bit being kept
            INT32 extraBitsMask   = LSB - 1;
            INT32 half            = LSB >> 1; // round bias

            // round to nearest-even at LSB
            if ((output & LSB) || (output & extraBitsMask) > half)
            {
                output += half;
            }

            // shift off the extra bits (sign extending)
            output >>= extraBits;
        }
        else
        {
            output <<= -extraBits;

            // 2's complement if negative
            if (isNegative)
            {
                output = ~output + 1;
            }
        }
    }
    return output;
}