std::unique_ptr get_compressor_from_header()

in Release/src/http/common/http_compression.cpp [841:1001]


std::unique_ptr<compress_provider> get_compressor_from_header(
    const utility::string_t& encoding,
    header_types type,
    const std::vector<std::shared_ptr<compress_factory>>& factories)
{
    const std::vector<std::shared_ptr<compress_factory>>& f =
        factories.empty() ? web::http::compression::builtin::g_compress_factories : factories;
    std::unique_ptr<compress_provider> compressor;
    struct _tuple
    {
        size_t start;
        size_t length;
        size_t rank;
    } t;
    std::vector<_tuple> tokens;
    size_t highest;
    size_t mark;
    size_t end;
    size_t n;
    bool first;

    _ASSERTE(type == header_types::te || type == header_types::accept_encoding);

    // See https://tools.ietf.org/html/rfc7230#section-4.3 (TE) and
    // https://tools.ietf.org/html/rfc7231#section-5.3.4 (Accept-Encoding) for details

    n = 0;
    highest = 0;
    first = true;
    while (n != utility::string_t::npos)
    {
        // Tokenize by commas first
        mark = encoding.find(_XPLATSTR(','), n);
        t.start = n;
        t.rank = static_cast<size_t>(-1);
        if (mark == utility::string_t::npos)
        {
            t.length = encoding.size() - n;
            n = utility::string_t::npos;
        }
        else
        {
            t.length = mark - n;
            n = mark + 1;
        }

        // Then remove leading and trailing whitespace
        remove_surrounding_http_whitespace(encoding, t.start, t.length);

        // Next split at the semicolon, if any, and deal with rank and additional whitespace
        mark = encoding.find(_XPLATSTR(';'), t.start);
        if (mark < t.start + t.length)
        {
            end = t.start + t.length - 1;
            t.length = mark - t.start;
            while (t.length > 0 && is_http_whitespace(encoding.at(t.start + t.length - 1)))
            {
                // Skip trailing whitespace in encoding type
                t.length--;
            }
            if (mark < end)
            {
                // Check for an optional ranking, max. length "q=0.999"
                mark = encoding.find(_XPLATSTR("q="), mark + 1);
                if (mark != utility::string_t::npos && mark + 1 < end && end - mark <= 6)
                {
                    // Determine ranking; leading whitespace has been implicitly skipped by find().
                    // The ranking always starts with '1' or '0' per standard, and has at most 3 decimal places
                    mark += 1;
                    t.rank = 1000 * (encoding.at(mark + 1) - _XPLATSTR('0'));
                    if (mark + 2 < end && encoding.at(mark + 2) == _XPLATSTR('.'))
                    {
                        // This is a real number rank; convert decimal part to hundreds and apply it
                        size_t factor = 100;
                        mark += 2;
                        for (size_t i = mark + 1; i <= end; i++)
                        {
                            t.rank += (encoding.at(i) - _XPLATSTR('0')) * factor;
                            factor /= 10;
                        }
                    }
                    if (t.rank > 1000)
                    {
                        throw http_exception(status_codes::BadRequest, "Invalid q-value in header");
                    }
                }
            }
        }

        if (!t.length)
        {
            if (!first || n != utility::string_t::npos)
            {
                // An entirely empty header is OK per RFC, but an extraneous comma is not
                throw http_exception(status_codes::BadRequest, "Empty field in header");
            }
            return std::unique_ptr<compress_provider>();
        }

        if (!compressor)
        {
            if (t.rank == static_cast<size_t>(1000) || t.rank == static_cast<size_t>(-1))
            {
                // Immediately try to instantiate a compressor for any unranked or top-ranked algorithm
                compressor = web::http::compression::builtin::_make_compressor(f, encoding.substr(t.start, t.length));
            }
            else if (t.rank)
            {
                // Store off remaining ranked algorithms, sorting as we go
                if (t.rank >= highest)
                {
                    tokens.emplace_back(t);
                    highest = t.rank;
                }
                else
                {
                    for (auto x = tokens.begin(); x != tokens.end(); x++)
                    {
                        if (t.rank <= x->rank)
                        {
                            tokens.emplace(x, t);
                            break;
                        }
                    }
                }
            }
            // else a rank of 0 means "not permitted"
        }
        // else we've chosen a compressor; we're just validating the rest of the header

        first = false;
    }
    // Note: for Accept-Encoding, we don't currently explicitly handle "identity;q=0" and "*;q=0"

    if (compressor)
    {
        return compressor;
    }

    // If we're here, we didn't match the caller's compressor above;
    // try any that we saved off in order of highest to lowest rank
    for (auto t = tokens.rbegin(); t != tokens.rend(); t++)
    {
        auto coding = encoding.substr(t->start, t->length);

        // N.B for TE, "trailers" will simply fail to instantiate a
        // compressor; ditto for "*" and "identity" for Accept-Encoding
        auto compressor = web::http::compression::builtin::_make_compressor(f, coding);
        if (compressor)
        {
            return compressor;
        }
        if (type == header_types::accept_encoding && utility::details::str_iequal(coding, _XPLATSTR("identity")))
        {
            // The client specified a preference for "no encoding" vs. anything else we might still have
            return std::unique_ptr<compress_provider>();
        }
    }

    return std::unique_ptr<compress_provider>();
}