apr_status_t serf_bucket_limited_readline()

in buckets/buckets.c [128:234]


apr_status_t serf_bucket_limited_readline(serf_bucket_t *bucket, int acceptable,
                                          apr_size_t requested, int *found,
                                          const char **data, apr_size_t *len)
{
    apr_status_t status;
    const char *peek_data;
    apr_size_t peek_len;

    status = bucket->type->peek(bucket, &peek_data, &peek_len);
    if (SERF_BUCKET_READ_ERROR(status))
        return status;

    if (peek_len == 0) {
        /* peek() returned no data.  */

        /* ... if that's because the bucket has no data, then we're done.  */
        if (APR_STATUS_IS_EOF(status)) {
            *found = SERF_NEWLINE_NONE;
            *len = 0;
            return APR_EOF;
        }

        /* We can only read and return a single character.

           For example, if we tried reading 2 characters seeking CRLF, and
           got CR followed by 'a', then we have over-read the line, and
           consumed a character from the next line. Bad.

           The only exception is when we *only* allow CRLF as newline. In that
           case CR followed by 'a' would just be raw line data, not a line
           break followed by data. If we allow any other type of newline we
           can't use this trick.
         */

        if ((acceptable & SERF_NEWLINE_ANY) == SERF_NEWLINE_CRLF)
            requested = MIN(requested, 2); /* Only CRLF is allowed */
        else
            requested = MIN(requested, 1);
    }
    else {
        /* peek_len > 0  */

        const char *cr = NULL;
        const char *lf = NULL;

        if (peek_len > requested)
          peek_len = requested;

        if ((acceptable & SERF_NEWLINE_CR) || (acceptable & SERF_NEWLINE_CRLF))
            cr = memchr(peek_data, '\r', peek_len);
        if ((acceptable & SERF_NEWLINE_LF))
            lf = memchr(peek_data, '\n', peek_len);

        if (cr && lf)
            cr = MIN(cr, lf);
        else if (lf)
            cr = lf;

        /* ### When we are only looking for CRLF we may return too small
               chunks here when the data contains CR or LF without the other.
               That isn't incorrect, but it could be optimized.

           ### But as that case is not common, the caller has to assume
               partial reads anyway and this is just a not very inefficient
               fallback implementation...

               Let's make the buffering in the caller handle that case
               for now. */

        if (cr && *cr == '\r' && (acceptable & SERF_NEWLINE_CRLF)
            && ((cr + 1) < (peek_data + peek_len)) && *(cr + 1) == '\n')
        {
            requested = (cr + 2) - peek_data;
        }
        else if (cr)
            requested = (cr + 1) - peek_data;
        else
            requested = peek_len;
    }

    status = bucket->type->read(bucket, requested, data, len);
    if (SERF_BUCKET_READ_ERROR(status))
        return status;

    if (*len == 0) {
        *found = SERF_NEWLINE_NONE;
    }
    else if ((acceptable & SERF_NEWLINE_CRLF) && *len >= 2
             && (*data)[*len - 1] == '\n' && (*data)[*len - 2] == '\r')
    {
        *found = SERF_NEWLINE_CRLF;
    }
    else if ((acceptable & SERF_NEWLINE_LF) && (*data)[*len - 1] == '\n')
    {
        *found = SERF_NEWLINE_LF;
    }
    else if ((acceptable & (SERF_NEWLINE_CRLF | SERF_NEWLINE_CR))
             && (*data)[*len - 1] == '\r')
    {
        *found = (acceptable & (SERF_NEWLINE_CRLF)) ? SERF_NEWLINE_CRLF_SPLIT
                                                    : SERF_NEWLINE_CR;
    }
    else
        *found = SERF_NEWLINE_NONE;

    return status;
}