std::unique_ptr GaussianBlur()

in src/gpu/ganesh/GrBlurUtils.cpp [2107:2429]
241 lines of code
48 McCabe index (conditional complexity)

std::unique_ptr<skgpu::ganesh::SurfaceDrawContext> GaussianBlur(GrRecordingContext* rContext,
                                                                GrSurfaceProxyView srcView,
                                                                GrColorType srcColorType,
                                                                SkAlphaType srcAlphaType,
                                                                sk_sp<SkColorSpace> colorSpace,
                                                                SkIRect dstBounds,
                                                                SkIRect srcBounds,
                                                                float sigmaX,
                                                                float sigmaY,
                                                                SkTileMode mode,
                                                                SkBackingFit fit) {
    SkASSERT(rContext);
    TRACE_EVENT2("skia.gpu", "GaussianBlur", "sigmaX", sigmaX, "sigmaY", sigmaY);

    if (!srcView.asTextureProxy()) {
        return nullptr;
    }

    int maxRenderTargetSize = rContext->priv().caps()->maxRenderTargetSize();
    if (dstBounds.width() > maxRenderTargetSize || dstBounds.height() > maxRenderTargetSize) {
        return nullptr;
    }

    int radiusX = skgpu::BlurSigmaRadius(sigmaX);
    int radiusY = skgpu::BlurSigmaRadius(sigmaY);
    // Attempt to reduce the srcBounds in order to detect that we can set the sigmas to zero or
    // to reduce the amount of work to rescale the source if sigmas are large. TODO: Could consider
    // how to minimize the required source bounds for repeat/mirror modes.
    if (mode == SkTileMode::kClamp || mode == SkTileMode::kDecal) {
        SkIRect reach = dstBounds.makeOutset(radiusX, radiusY);
        SkIRect intersection;
        if (!intersection.intersect(reach, srcBounds)) {
            if (mode == SkTileMode::kDecal) {
                return nullptr;
            } else {
                if (reach.fLeft >= srcBounds.fRight) {
                    srcBounds.fLeft = srcBounds.fRight - 1;
                } else if (reach.fRight <= srcBounds.fLeft) {
                    srcBounds.fRight = srcBounds.fLeft + 1;
                }
                if (reach.fTop >= srcBounds.fBottom) {
                    srcBounds.fTop = srcBounds.fBottom - 1;
                } else if (reach.fBottom <= srcBounds.fTop) {
                    srcBounds.fBottom = srcBounds.fTop + 1;
                }
            }
        } else {
            srcBounds = intersection;
        }
    }

    if (mode != SkTileMode::kDecal) {
        // All non-decal tile modes are equivalent for one pixel width/height src and amount to a
        // single color value repeated at each column/row. Applying the normalized kernel to that
        // column/row yields that same color. So no blurring is necessary.
        if (srcBounds.width() == 1) {
            sigmaX = 0.f;
            radiusX = 0;
        }
        if (srcBounds.height() == 1) {
            sigmaY = 0.f;
            radiusY = 0;
        }
    }

    // If we determined that there is no blurring necessary in either direction then just do a
    // a draw that applies the tile mode.
    if (!radiusX && !radiusY) {
        // Create the sdc with default SkSurfaceProps. Gaussian blurs will soon use a
        // SurfaceFillContext, at which point the SkSurfaceProps won't exist anymore.
        auto result =
                skgpu::ganesh::SurfaceDrawContext::Make(rContext,
                                                        srcColorType,
                                                        std::move(colorSpace),
                                                        fit,
                                                        dstBounds.size(),
                                                        SkSurfaceProps(),
                                                        /*label=*/"SurfaceDrawContext_GaussianBlur",
                                                        /* sampleCnt= */ 1,
                                                        skgpu::Mipmapped::kNo,
                                                        srcView.proxy()->isProtected(),
                                                        srcView.origin());
        if (!result) {
            return nullptr;
        }
        GrSamplerState sampler(SkTileModeToWrapMode(mode), GrSamplerState::Filter::kNearest);
        auto fp = GrTextureEffect::MakeSubset(std::move(srcView),
                                              srcAlphaType,
                                              SkMatrix::I(),
                                              sampler,
                                              SkRect::Make(srcBounds),
                                              SkRect::Make(dstBounds),
                                              *rContext->priv().caps());
        result->fillRectToRectWithFP(dstBounds, SkIRect::MakeSize(dstBounds.size()), std::move(fp));
        return result;
    }

    // Any sigma higher than the limit for the 1D linear-filtered Gaussian blur is downsampled. If
    // the sigma in X and Y just so happen to fit in the 2D limit, we'll use that. The 2D limit is
    // always less than the linear blur sigma limit.
    static constexpr float kMaxSigma = skgpu::kMaxLinearBlurSigma;
    if (sigmaX <= kMaxSigma && sigmaY <= kMaxSigma) {
        // For really small blurs (certainly no wider than 5x5 on desktop GPUs) it is faster to just
        // launch a single non separable kernel vs two launches.
        const int kernelSize = skgpu::BlurKernelWidth(radiusX) * skgpu::BlurKernelWidth(radiusY);
        if (radiusX > 0 && radiusY > 0 &&
            kernelSize <= skgpu::kMaxBlurSamples &&
            !rContext->priv().caps()->reducedShaderMode()) {
            // Apply the proxy offset to src bounds and offset directly
            return convolve_gaussian_2d(rContext,
                                        std::move(srcView),
                                        srcColorType,
                                        srcBounds,
                                        dstBounds,
                                        radiusX,
                                        radiusY,
                                        sigmaX,
                                        sigmaY,
                                        mode,
                                        std::move(colorSpace),
                                        fit);
        }
        // This will automatically degenerate into a single pass of X or Y if only one of the
        // radii are non-zero.
        SkASSERT(skgpu::BlurLinearKernelWidth(radiusX) <= skgpu::kMaxBlurSamples &&
                 skgpu::BlurLinearKernelWidth(radiusY) <= skgpu::kMaxBlurSamples);
        return two_pass_gaussian(rContext,
                                 std::move(srcView),
                                 srcColorType,
                                 srcAlphaType,
                                 std::move(colorSpace),
                                 srcBounds,
                                 dstBounds,
                                 sigmaX,
                                 sigmaY,
                                 radiusX,
                                 radiusY,
                                 mode,
                                 fit);
    }

    GrColorInfo colorInfo(srcColorType, srcAlphaType, colorSpace);
    auto srcCtx = rContext->priv().makeSC(srcView, colorInfo);
    SkASSERT(srcCtx);

#if defined(SK_USE_PADDED_BLUR_UPSCALE)
    // When we are in clamp mode any artifacts in the edge pixels due to downscaling may be
    // exacerbated because of the tile mode. The particularly egregious case is when the original
    // image has transparent black around the edges and the downscaling pulls in some non-zero
    // values from the interior. Ultimately it'd be better for performance if the calling code could
    // give us extra context around the blur to account for this. We don't currently have a good way
    // to communicate this up stack. So we leave a 1 pixel border around the rescaled src bounds.
    // We populate the top 1 pixel tall row of this border by rescaling the top row of the original
    // source bounds into it. Because this is only rescaling in x (i.e. rescaling a 1 pixel high
    // row into a shorter but still 1 pixel high row) we won't read any interior values. And similar
    // for the other three borders. We'll adjust the source/dest bounds rescaled blur so that this
    // border of extra pixels is used as the edge pixels for clamp mode but the dest bounds
    // corresponds only to the pixels inside the border (the normally rescaled pixels inside this
    // border).
    // Moreover, if we clamped the rescaled size to 1 column or row then we still have a sigma
    // that is greater than kMaxSigma. By using a pad and making the src 3 wide/tall instead of
    // 1 we can recurse again and do another downscale. Since mirror and repeat modes are trivial
    // for a single col/row we only add padding based on sigma exceeding kMaxSigma for decal.
    int padX = mode == SkTileMode::kClamp || (mode == SkTileMode::kDecal && sigmaX > kMaxSigma) ? 1
                                                                                                : 0;
    int padY = mode == SkTileMode::kClamp || (mode == SkTileMode::kDecal && sigmaY > kMaxSigma) ? 1
                                                                                                : 0;
#endif

    float scaleX = sigmaX > kMaxSigma ? kMaxSigma / sigmaX : 1.f;
    float scaleY = sigmaY > kMaxSigma ? kMaxSigma / sigmaY : 1.f;
    // We round down here so that when we recalculate sigmas we know they will be below
    // kMaxSigma (but clamp to 1 do we don't have an empty texture).
    SkISize rescaledSize = {std::max(sk_float_floor2int(srcBounds.width() * scaleX), 1),
                            std::max(sk_float_floor2int(srcBounds.height() * scaleY), 1)};
    // Compute the sigmas using the actual scale factors used once we integerized the
    // rescaledSize.
    scaleX = static_cast<float>(rescaledSize.width()) / srcBounds.width();
    scaleY = static_cast<float>(rescaledSize.height()) / srcBounds.height();
    sigmaX *= scaleX;
    sigmaY *= scaleY;

#if !defined(SK_USE_PADDED_BLUR_UPSCALE)
    // Historically, padX and padY were calculated after scaling sigmaX,Y, which meant that they
    // would never be greater than kMaxSigma. This causes pixel diffs so must be guarded along with
    // the rest of the padding dst behavior.
    int padX = mode == SkTileMode::kClamp || (mode == SkTileMode::kDecal && sigmaX > kMaxSigma) ? 1
                                                                                                : 0;
    int padY = mode == SkTileMode::kClamp || (mode == SkTileMode::kDecal && sigmaY > kMaxSigma) ? 1
                                                                                                : 0;
#endif

    // Create the sdc with default SkSurfaceProps. Gaussian blurs will soon use a
    // SurfaceFillContext, at which point the SkSurfaceProps won't exist anymore.
    auto rescaledSDC = skgpu::ganesh::SurfaceDrawContext::Make(
            srcCtx->recordingContext(),
            colorInfo.colorType(),
            colorInfo.refColorSpace(),
            SkBackingFit::kApprox,
            {rescaledSize.width() + 2 * padX, rescaledSize.height() + 2 * padY},
            SkSurfaceProps(),
            /*label=*/"RescaledSurfaceDrawContext",
            /* sampleCnt= */ 1,
            skgpu::Mipmapped::kNo,
            srcCtx->asSurfaceProxy()->isProtected(),
            srcCtx->origin());
    if (!rescaledSDC) {
        return nullptr;
    }
    if ((padX || padY) && mode == SkTileMode::kDecal) {
        rescaledSDC->clear(SkPMColor4f{0, 0, 0, 0});
    }
    if (!srcCtx->rescaleInto(rescaledSDC.get(),
                             SkIRect::MakeSize(rescaledSize).makeOffset(padX, padY),
                             srcBounds,
                             SkSurface::RescaleGamma::kSrc,
                             SkSurface::RescaleMode::kRepeatedLinear)) {
        return nullptr;
    }
    if (mode == SkTileMode::kClamp) {
        SkASSERT(padX == 1 && padY == 1);
        // Rather than run a potentially multi-pass rescaler on single rows/columns we just do a
        // single bilerp draw. If we find this quality unacceptable we should think more about how
        // to rescale these with better quality but without 4 separate multi-pass downscales.
        auto cheapDownscale = [&](SkIRect dstRect, SkIRect srcRect) {
            rescaledSDC->drawTexture(nullptr,
                                     srcCtx->readSurfaceView(),
                                     srcAlphaType,
                                     GrSamplerState::Filter::kLinear,
                                     GrSamplerState::MipmapMode::kNone,
                                     SkBlendMode::kSrc,
                                     SK_PMColor4fWHITE,
                                     SkRect::Make(srcRect),
                                     SkRect::Make(dstRect),
                                     GrQuadAAFlags::kNone,
                                     SkCanvas::SrcRectConstraint::kFast_SrcRectConstraint,
                                     SkMatrix::I(),
                                     nullptr);
        };
        auto [dw, dh] = rescaledSize;
        // The are the src rows and columns from the source that we will scale into the dst padding.
        float sLCol = srcBounds.left();
        float sTRow = srcBounds.top();
        float sRCol = srcBounds.right() - 1;
        float sBRow = srcBounds.bottom() - 1;

        int sx = srcBounds.left();
        int sy = srcBounds.top();
        int sw = srcBounds.width();
        int sh = srcBounds.height();

        // Downscale the edges from the original source. These draws should batch together (and with
        // the above interior rescaling when it is a single pass).
        cheapDownscale(SkIRect::MakeXYWH(0, 1, 1, dh), SkIRect::MakeXYWH(sLCol, sy, 1, sh));
        cheapDownscale(SkIRect::MakeXYWH(1, 0, dw, 1), SkIRect::MakeXYWH(sx, sTRow, sw, 1));
        cheapDownscale(SkIRect::MakeXYWH(dw + 1, 1, 1, dh), SkIRect::MakeXYWH(sRCol, sy, 1, sh));
        cheapDownscale(SkIRect::MakeXYWH(1, dh + 1, dw, 1), SkIRect::MakeXYWH(sx, sBRow, sw, 1));

        // Copy the corners from the original source. These would batch with the edges except that
        // at time of writing we recognize these can use kNearest and downgrade the filter. So they
        // batch with each other but not the edge draws.
        cheapDownscale(SkIRect::MakeXYWH(0, 0, 1, 1), SkIRect::MakeXYWH(sLCol, sTRow, 1, 1));
        cheapDownscale(SkIRect::MakeXYWH(dw + 1, 0, 1, 1), SkIRect::MakeXYWH(sRCol, sTRow, 1, 1));
        cheapDownscale(SkIRect::MakeXYWH(dw + 1, dh + 1, 1, 1),
                       SkIRect::MakeXYWH(sRCol, sBRow, 1, 1));
        cheapDownscale(SkIRect::MakeXYWH(0, dh + 1, 1, 1), SkIRect::MakeXYWH(sLCol, sBRow, 1, 1));
    }
    srcView = rescaledSDC->readSurfaceView();
    // Drop the contexts so we don't hold the proxies longer than necessary.
    rescaledSDC.reset();
    srcCtx.reset();

    // Compute the dst bounds in the scaled down space. First move the origin to be at the top
    // left since we trimmed off everything above and to the left of the original src bounds during
    // the rescale.
    SkRect scaledDstBounds = SkRect::Make(dstBounds.makeOffset(-srcBounds.topLeft()));
    scaledDstBounds.fLeft *= scaleX;
    scaledDstBounds.fTop *= scaleY;
    scaledDstBounds.fRight *= scaleX;
    scaledDstBounds.fBottom *= scaleY;
    // Account for padding in our rescaled src, if any.
    scaledDstBounds.offset(padX, padY);
    // Turn the scaled down dst bounds into an integer pixel rect, adding 1px of padding to help
    // with boundary sampling during re-expansion when there are extreme scale factors. This is
    // particularly important when the blurs extend across Chrome raster tiles; w/o it the re-expand
    // produces visible seams: crbug.com/1500021.
#if defined(SK_USE_PADDED_BLUR_UPSCALE)
    static constexpr int kDstPadding = 1;
#else
    static constexpr int kDstPadding = 0;
#endif
    auto scaledDstBoundsI = scaledDstBounds.roundOut();
    scaledDstBoundsI.outset(kDstPadding, kDstPadding);

    SkIRect scaledSrcBounds = SkIRect::MakeSize(srcView.dimensions());
    auto sdc = GaussianBlur(rContext,
                            std::move(srcView),
                            srcColorType,
                            srcAlphaType,
                            colorSpace,
                            scaledDstBoundsI,
                            scaledSrcBounds,
                            sigmaX,
                            sigmaY,
                            mode,
                            fit);
    if (!sdc) {
        return nullptr;
    }

    SkASSERT(sdc->width() == scaledDstBoundsI.width() &&
             sdc->height() == scaledDstBoundsI.height());
    // We rounded out the integer scaled dst bounds. Select the fractional dst bounds from the
    // integer dimension blurred result when we scale back up. This also accounts for the padding
    // added to 'scaledDstBoundsI' when sampling from the blurred result.
    scaledDstBounds.offset(-scaledDstBoundsI.left(), -scaledDstBoundsI.top());
    return reexpand(rContext,
                    std::move(sdc),
                    scaledDstBounds,
                    dstBounds.size(),
                    std::move(colorSpace),
                    fit);
}