static apr_status_t deflate_out_filter()

in modules/filters/mod_deflate.c [562:1046]


static apr_status_t deflate_out_filter(ap_filter_t *f,
                                       apr_bucket_brigade *bb)
{
    apr_bucket *e;
    request_rec *r = f->r;
    deflate_ctx *ctx = f->ctx;
    int zRC;
    apr_status_t rv;
    apr_size_t len = 0, blen;
    const char *data;
    deflate_filter_config *c;

    /* Do nothing if asked to filter nothing. */
    if (APR_BRIGADE_EMPTY(bb)) {
        return APR_SUCCESS;
    }

    c = ap_get_module_config(r->server->module_config,
                             &deflate_module);

    /* If we don't have a context, we need to ensure that it is okay to send
     * the deflated content.  If we have a context, that means we've done
     * this before and we liked it.
     * This could be not so nice if we always fail.  But, if we succeed,
     * we're in better shape.
     */
    if (!ctx) {
        char *token;
        const char *encoding;

        if (have_ssl_compression(r)) {
            ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, r,
                          "Compression enabled at SSL level; not compressing "
                          "at HTTP level.");
            ap_remove_output_filter(f);
            return ap_pass_brigade(f->next, bb);
        }

        /* We have checked above that bb is not empty */
        e = APR_BRIGADE_LAST(bb);
        if (APR_BUCKET_IS_EOS(e)) {
            /*
             * If we already know the size of the response, we can skip
             * compression on responses smaller than the compression overhead.
             * However, if we compress, we must initialize deflate_out before
             * calling ap_pass_brigade() for the first time.  Otherwise the
             * headers will be sent to the client without
             * "Content-Encoding: gzip".
             */
            e = APR_BRIGADE_FIRST(bb);
            while (1) {
                apr_status_t rc;
                if (APR_BUCKET_IS_EOS(e)) {
                    ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, r,
                                  "Not compressing very small response of %"
                                  APR_SIZE_T_FMT " bytes", len);
                    ap_remove_output_filter(f);
                    return ap_pass_brigade(f->next, bb);
                }
                if (APR_BUCKET_IS_METADATA(e)) {
                    e = APR_BUCKET_NEXT(e);
                    continue;
                }

                if (e->length == (apr_size_t)-1) {
                    rc = apr_bucket_read(e, &data, &blen, APR_BLOCK_READ);
                    if (rc != APR_SUCCESS)
                        return rc;
                }
                else {
                    blen = e->length;
                }
                len += blen;
                /* 50 is for Content-Encoding and Vary headers and ETag suffix */
                if (len > sizeof(gzip_header) + VALIDATION_SIZE + 50)
                    break;

                e = APR_BUCKET_NEXT(e);
            }
        }

        ctx = f->ctx = apr_pcalloc(r->pool, sizeof(*ctx));

        /*
         * Only work on main request, not subrequests,
         * that are not a 204 response with no content
         * and are not tagged with the no-gzip env variable
         * and not a partial response to a Range request.
         *
         * Note that responding to 304 is handled separately to
         * set the required headers (such as ETag) per RFC7232, 4.1.
         */
        if ((r->main != NULL) || (r->status == HTTP_NO_CONTENT) ||
            apr_table_get(r->subprocess_env, "no-gzip") ||
            apr_table_get(r->headers_out, "Content-Range")
           ) {
            if (APLOG_R_IS_LEVEL(r, APLOG_TRACE1)) {
                const char *reason =
                    (r->main != NULL)                           ? "subrequest" :
                    (r->status == HTTP_NO_CONTENT)              ? "no content" :
                    apr_table_get(r->subprocess_env, "no-gzip") ? "no-gzip" :
                    "content-range";
                ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, r,
                              "Not compressing (%s)", reason);
            }
            ap_remove_output_filter(f);
            return ap_pass_brigade(f->next, bb);
        }

        /* Some browsers might have problems with content types
         * other than text/html, so set gzip-only-text/html
         * (with browsermatch) for them
         */
        if (r->content_type == NULL
             || strncmp(r->content_type, "text/html", 9)) {
            const char *env_value = apr_table_get(r->subprocess_env,
                                                  "gzip-only-text/html");
            if ( env_value && (strcmp(env_value,"1") == 0) ) {
                ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, r,
                              "Not compressing, (gzip-only-text/html)");
                ap_remove_output_filter(f);
                return ap_pass_brigade(f->next, bb);
            }
        }

        /* Let's see what our current Content-Encoding is.
         * If it's already encoded, don't compress again.
         * (We could, but let's not.)
         */
        encoding = apr_table_get(r->headers_out, "Content-Encoding");
        if (encoding) {
            const char *err_enc;

            err_enc = apr_table_get(r->err_headers_out, "Content-Encoding");
            if (err_enc) {
                encoding = apr_pstrcat(r->pool, encoding, ",", err_enc, NULL);
            }
        }
        else {
            encoding = apr_table_get(r->err_headers_out, "Content-Encoding");
        }

        if (r->content_encoding) {
            encoding = encoding ? apr_pstrcat(r->pool, encoding, ",",
                                              r->content_encoding, NULL)
                                : r->content_encoding;
        }

        if (encoding) {
            const char *tmp = encoding;

            token = ap_get_token(r->pool, &tmp, 0);
            while (token && *token) {
                /* stolen from mod_negotiation: */
                if (strcmp(token, "identity") && strcmp(token, "7bit") &&
                    strcmp(token, "8bit") && strcmp(token, "binary")) {
                    ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, r,
                                  "Not compressing (content-encoding already "
                                  " set: %s)", token);
                    ap_remove_output_filter(f);
                    return ap_pass_brigade(f->next, bb);
                }

                /* Otherwise, skip token */
                if (*tmp) {
                    ++tmp;
                }
                token = (*tmp) ? ap_get_token(r->pool, &tmp, 0) : NULL;
            }
        }

        /* Even if we don't accept this request based on it not having
         * the Accept-Encoding, we need to note that we were looking
         * for this header and downstream proxies should be aware of that.
         */
        apr_table_mergen(r->headers_out, "Vary", "Accept-Encoding");

        /* force-gzip will just force it out regardless if the browser
         * can actually do anything with it.
         */
        if (!apr_table_get(r->subprocess_env, "force-gzip")) {
            const char *accepts;
            const char *q = NULL;

            /* if they don't have the line, then they can't play */
            accepts = apr_table_get(r->headers_in, "Accept-Encoding");
            if (accepts == NULL) {
                ap_remove_output_filter(f);
                return ap_pass_brigade(f->next, bb);
            }

            token = ap_get_token(r->pool, &accepts, 0);
            while (token && token[0] && ap_cstr_casecmp(token, "gzip")) {
                /* skip parameters, XXX: ;q=foo evaluation? */
                while (*accepts == ';') {
                    ++accepts;
                    ap_get_token(r->pool, &accepts, 1);
                }

                /* retrieve next token */
                if (*accepts == ',') {
                    ++accepts;
                }
                token = (*accepts) ? ap_get_token(r->pool, &accepts, 0) : NULL;
            }

            /* Find the qvalue, if provided */
            if (*accepts) {
                while (*accepts == ';') {
                    ++accepts;
                }
                q = ap_get_token(r->pool, &accepts, 1);
                ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, r,
                              "token: '%s' - q: '%s'", token ? token : "NULL", q);
            }

            /* No acceptable token found or q=0 */
            if (!token || token[0] == '\0' ||
                (q && strlen(q) >= 3 && strncmp("q=0.000", q, strlen(q)) == 0)) {
                ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, r,
                              "Not compressing (no Accept-Encoding: gzip or q=0)");
                ap_remove_output_filter(f);
                return ap_pass_brigade(f->next, bb);
            }
        }
        else {
            ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, r,
                          "Forcing compression (force-gzip set)");
        }

        /* At this point we have decided to filter the content. Let's try to
         * to initialize zlib (except for 304 responses, where we will only
         * send out the headers).
         */

        if (r->status != HTTP_NOT_MODIFIED) {
            ctx->bb = apr_brigade_create(r->pool, f->c->bucket_alloc);
            ctx->buffer = apr_palloc(r->pool, c->bufferSize);
            ctx->libz_end_func = deflateEnd;

            zRC = deflateInit2(&ctx->stream, c->compressionlevel, Z_DEFLATED,
                               c->windowSize, c->memlevel,
                               Z_DEFAULT_STRATEGY);

            if (zRC != Z_OK) {
                deflateEnd(&ctx->stream);
                ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01383)
                              "unable to init Zlib: "
                              "deflateInit2 returned %d: URL %s",
                              zRC, r->uri);
                /*
                 * Remove ourselves as it does not make sense to return:
                 * We are not able to init libz and pass data down the chain
                 * uncompressed.
                 */
                ap_remove_output_filter(f);
                return ap_pass_brigade(f->next, bb);
            }
            /*
             * Register a cleanup function to ensure that we cleanup the internal
             * libz resources.
             */
            apr_pool_cleanup_register(r->pool, ctx, deflate_ctx_cleanup,
                                      apr_pool_cleanup_null);

            /* Set the filter init flag so subsequent invocations know we are
             * active.
             */
            ctx->filter_init = 1;
        }

        /*
         * Zlib initialization worked, so we can now change the important
         * content metadata before sending the response out.
         */

        /* If the entire Content-Encoding is "identity", we can replace it. */
        if (!encoding || !ap_cstr_casecmp(encoding, "identity")) {
            apr_table_setn(r->headers_out, "Content-Encoding", "gzip");
        }
        else {
            apr_table_mergen(r->headers_out, "Content-Encoding", "gzip");
        }
        /* Fix r->content_encoding if it was set before */
        if (r->content_encoding) {
            r->content_encoding = apr_table_get(r->headers_out,
                                                "Content-Encoding");
        }
        apr_table_unset(r->headers_out, "Content-Length");
        apr_table_unset(r->headers_out, "Content-MD5");
        if (c->etag_opt != AP_DEFLATE_ETAG_NOCHANGE) {  
            deflate_check_etag(r, "gzip", c->etag_opt);
        }

        /* For a 304 response, only change the headers */
        if (r->status == HTTP_NOT_MODIFIED) {
            ap_remove_output_filter(f);
            return ap_pass_brigade(f->next, bb);
        }

        /* add immortal gzip header */
        e = apr_bucket_immortal_create(gzip_header, sizeof gzip_header,
                                       f->c->bucket_alloc);
        APR_BRIGADE_INSERT_TAIL(ctx->bb, e);

        /* initialize deflate output buffer */
        ctx->stream.next_out = ctx->buffer;
        ctx->stream.avail_out = c->bufferSize;
    } else if (!ctx->filter_init) {
        /* Hmm.  We've run through the filter init before as we have a ctx,
         * but we never initialized.  We probably have a dangling ref.  Bail.
         */
        return ap_pass_brigade(f->next, bb);
    }

    while (!APR_BRIGADE_EMPTY(bb))
    {
        apr_bucket *b;

        /*
         * Optimization: If we are a HEAD request and bytes_sent is not zero
         * it means that we have passed the content-length filter once and
         * have more data to sent. This means that the content-length filter
         * could not determine our content-length for the response to the
         * HEAD request anyway (the associated GET request would deliver the
         * body in chunked encoding) and we can stop compressing.
         */
        if (r->header_only && r->bytes_sent) {
            ap_remove_output_filter(f);
            return ap_pass_brigade(f->next, bb);
        }

        e = APR_BRIGADE_FIRST(bb);

        if (APR_BUCKET_IS_EOS(e)) {
            char *buf;

            ctx->stream.avail_in = 0; /* should be zero already anyway */
            /* flush the remaining data from the zlib buffers */
            flush_libz_buffer(ctx, c, deflate, Z_FINISH, NO_UPDATE_CRC);

            buf = apr_palloc(r->pool, VALIDATION_SIZE);
            putLong((unsigned char *)&buf[0], ctx->crc);
            putLong((unsigned char *)&buf[4], ctx->stream.total_in);

            b = apr_bucket_pool_create(buf, VALIDATION_SIZE, r->pool,
                                       f->c->bucket_alloc);
            APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
            ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01384)
                          "Zlib: Compressed %" APR_UINT64_T_FMT
                          " to %" APR_UINT64_T_FMT " : URL %s",
                          (apr_uint64_t)ctx->stream.total_in,
                          (apr_uint64_t)ctx->stream.total_out, r->uri);

            /* leave notes for logging */
            if (c->note_input_name) {
                apr_table_setn(r->notes, c->note_input_name,
                               (ctx->stream.total_in > 0)
                                ? apr_off_t_toa(r->pool,
                                                ctx->stream.total_in)
                                : "-");
            }

            if (c->note_output_name) {
                apr_table_setn(r->notes, c->note_output_name,
                               (ctx->stream.total_out > 0)
                                ? apr_off_t_toa(r->pool,
                                                ctx->stream.total_out)
                                : "-");
            }

            if (c->note_ratio_name) {
                apr_table_setn(r->notes, c->note_ratio_name,
                               (ctx->stream.total_in > 0)
                                ? apr_itoa(r->pool,
                                           (int)(ctx->stream.total_out
                                                 * 100
                                                 / ctx->stream.total_in))
                                : "-");
            }

            deflateEnd(&ctx->stream);

            /* We've ended the libz stream, so remove ourselves. */
            ap_remove_output_filter(f);

            /* No need for cleanup any longer */
            apr_pool_cleanup_kill(r->pool, ctx, deflate_ctx_cleanup);

            /* Remove EOS from the old list, and insert into the new. */
            APR_BUCKET_REMOVE(e);
            APR_BRIGADE_INSERT_TAIL(ctx->bb, e);

            /* Okay, we've seen the EOS.
             * Time to pass it along down the chain.
             */
            rv = ap_pass_brigade(f->next, ctx->bb);
            apr_brigade_cleanup(ctx->bb);
            return rv;
        }

        if (APR_BUCKET_IS_FLUSH(e)) {
            /* flush the remaining data from the zlib buffers */
            zRC = flush_libz_buffer(ctx, c, deflate, Z_SYNC_FLUSH,
                                    NO_UPDATE_CRC);
            if (zRC != Z_OK) {
                ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01385)
                              "Zlib error %d flushing zlib output buffer (%s)",
                              zRC, ctx->stream.msg);
                return APR_EGENERAL;
            }

            /* Remove flush bucket from old brigade anf insert into the new. */
            APR_BUCKET_REMOVE(e);
            APR_BRIGADE_INSERT_TAIL(ctx->bb, e);
            rv = ap_pass_brigade(f->next, ctx->bb);
            apr_brigade_cleanup(ctx->bb);
            if (rv != APR_SUCCESS) {
                return rv;
            }
            continue;
        }

        if (APR_BUCKET_IS_METADATA(e)) {
            /*
             * Remove meta data bucket from old brigade and insert into the
             * new.
             */
            APR_BUCKET_REMOVE(e);
            APR_BRIGADE_INSERT_TAIL(ctx->bb, e);
            continue;
        }

        /* read */
        rv = apr_bucket_read(e, &data, &len, APR_BLOCK_READ);
        if (rv) {
            ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r, APLOGNO(10298)
                          "failed reading from %s bucket", e->type->name);
            return rv;
        }
        if (!len) {
            apr_bucket_delete(e);
            continue;
        }
        if (len > APR_INT32_MAX) {
            apr_bucket_split(e, APR_INT32_MAX);
            apr_bucket_read(e, &data, &len, APR_BLOCK_READ);
        }

        /* This crc32 function is from zlib. */
        ctx->crc = crc32(ctx->crc, (const Bytef *)data, len);

        /* write */
        ctx->stream.next_in = (unsigned char *)data; /* We just lost const-ness,
                                                      * but we'll just have to
                                                      * trust zlib */
        ctx->stream.avail_in = (int)len;

        while (ctx->stream.avail_in != 0) {
            if (ctx->stream.avail_out == 0) {
                consume_buffer(ctx, c, c->bufferSize, NO_UPDATE_CRC, ctx->bb);

                /* Send what we have right now to the next filter. */
                rv = ap_pass_brigade(f->next, ctx->bb);
                apr_brigade_cleanup(ctx->bb);
                if (rv != APR_SUCCESS) {
                    return rv;
                }
            }

            zRC = deflate(&(ctx->stream), Z_NO_FLUSH);

            if (zRC != Z_OK) {
                ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01386)
                              "Zlib error %d deflating data (%s)", zRC,
                              ctx->stream.msg);
                return APR_EGENERAL;
            }
        }

        apr_bucket_delete(e);
    }

    return APR_SUCCESS;
}