export async function checkCallResults()

in src/webgpu/shader/execution/expression/call/builtin/texture_utils.ts [2311:2724]
306 lines of code
58 McCabe index (conditional complexity)

export async function checkCallResults<T extends Dimensionality>(
  t: GPUTest,
  softwareTexture: SoftwareTexture,
  textureType: string,
  sampler: GPUSamplerDescriptor | undefined,
  calls: TextureCall<T>[],
  results: Awaited<ReturnType<typeof doTextureCalls<T>>>,
  shortShaderStage: ShortShaderStage,
  gpuTexture?: GPUTexture
) {
  const stage = kShortShaderStageToShaderStage[shortShaderStage];
  if (builtinNeedsMipLevelWeights(calls[0].builtin)) {
    await initMipLevelWeightsForDevice(t, stage);
  }

  let haveComparisonCheckInfo = false;
  let checkInfo = {
    runner: results.runner,
    calls,
    sampler,
  };
  // These are only read if the tests fail. They are used to get the values from the
  // GPU texture for displaying in diagnostics.
  let gpuTexels: TexelView[] | undefined;
  const errs: string[] = [];
  const format = softwareTexture.texels[0].format;
  const size = reifyExtent3D(softwareTexture.descriptor.size);
  const maxFractionalDiff =
    sampler?.minFilter === 'linear' ||
    sampler?.magFilter === 'linear' ||
    sampler?.mipmapFilter === 'linear'
      ? getMaxFractionalDiffForTextureFormat(softwareTexture.descriptor.format)
      : 0;

  for (let callIdx = 0; callIdx < calls.length; callIdx++) {
    const call = calls[callIdx];
    const gotRGBA = results.results[callIdx];
    const expectRGBA = softwareTextureRead(t, stage, call, softwareTexture, sampler);
    // Issues with textureSampleBias
    //
    // textureSampleBias tests start to get unexpected results when bias >= ~12
    // where the mip level selected by the GPU is off by +/- 0.41.
    //
    // The issue is probably an internal precision issue. In order to test a bias of 12
    // we choose a target mip level between 0 and mipLevelCount - 1. For example 0.4.
    // We then compute what mip level we need the derivatives to select such that when
    // we add in the bias it will result in a mip level of 0.4.  For a bias of 12
    // that's means we need the derivatives to select mip level -11.4. That means
    // the derivatives are `pow(2, -11.4) / textureSize` so for a texture that's 16
    // pixels wide that's `0.00002312799936691891`. I'm just guessing some of that
    // gets rounded off leading. For example, if we round it ourselves.
    //
    // | derivative             | mip level |
    // +------------------------+-----------+
    // | 0.00002312799936691891 | -11.4     |
    // | 0.000022               | -11.47    |
    // | 0.000023               | -11.408   |
    // | 0.000024               | -11.34    |
    // +------------------------+-----------+
    //
    // Note: As an example of a bad case: set `callSpecificMaxFractionalDiff = maxFractionalDiff` below
    // then run `webgpu:shader,execution,expression,call,builtin,textureSampleBias:sampled_2d_coords:format="astc-6x6-unorm";filt="linear";modeU="m";modeV="m";offset=false`
    // on an M1 Mac.
    //
    // ```
    // EXPECTATION FAILED: subcase: samplePoints="spiral"
    // result was not as expected:
    //       size: [18, 18, 1]
    //   mipCount: 3
    //       call: textureSampleBias(texture: T, sampler: S, coords: vec2f(0.1527777777777778, 1.4166666666666667) + derivativeBase * derivativeMult(vec2f(0.00002249990733551491, 0)), bias: f32(15.739721414633095))  // #32
    //           : as texel coord @ mip level[0]: (2.750, 25.500)
    //           : as texel coord @ mip level[1]: (1.375, 12.750)
    //           : as texel coord @ mip level[2]: (0.611, 5.667)
    // implicit derivative based mip level: -15.439721414633095 (without bias)
    //                        clamped bias: 15.739721414633095
    //                 mip level with bias: 0.3000000000000007
    //        got: 0.555311381816864, 0.7921856045722961, 0.8004884123802185, 0.38046398758888245
    //   expected: 0.6069580801937625, 0.7999182825318225, 0.8152446179041957, 0.335314491045024
    //   max diff: 0.027450980392156862
    //  abs diffs: 0.0516466983768985, 0.007732677959526368, 0.014756205523977162, 0.04514949654385847
    //  rel diffs: 8.51%, 0.97%, 1.81%, 11.87%
    //  ulp diffs: 866488, 129733, 247568, 1514966
    //
    //   sample points:
    // expected:                                                                   | got:
    // ...
    // a: mip(0) at: [ 2, 10,  0], weight: 0.52740                                 | a: mip(0) at: [ 2, 10,  0], weight: 0.60931
    // b: mip(0) at: [ 3, 10,  0], weight: 0.17580                                 | b: mip(0) at: [ 3, 10,  0], weight: 0.20319
    // a: value: R: 0.46642, G: 0.77875, B: 0.77509, A: 0.45788                    | a: value: R: 0.46642, G: 0.77875, B: 0.77509, A: 0.45788
    // b: value: R: 0.46642, G: 0.77875, B: 0.77509, A: 0.45788                    | b: value: R: 0.46642, G: 0.77875, B: 0.77509, A: 0.45788
    // mip level (0) weight: 0.70320                                               | mip level (0) weight: 0.81250
    // ```
    //
    // Notice above the "expected" level weight (0.7) matches the "mip level with bias (0.3)" which is
    // the mip level we expected the GPU to select. Selecting mip level 0.3 will do `mix(level0, level1, 0.3)`
    // which is 0.7 of level 0 and 0.3 of level 1. Notice the "got" level weight is 0.81 which is pretty far off.
    //
    // Just looking at the failures, the largest formula below makes most of the tests pass
    //
    // MAINTENANCE_TODO: Consider different solutions for this issue
    //
    // 1. Try to figure out what the exact rounding issue is the take it into account
    //
    // 2. The code currently samples the texture once via the GPU and once via softwareTextureRead. These values are
    //    "got:" and "expected:" above. The test only fails if they are too different. We could rather get the bilinear
    //    sample from every mip level and then check the "got" value is between 2 of the levels (or equal if nearest).
    //    In other words.
    //
    //        if (bias >= 12)
    //          colorForEachMipLevel = range(mipLevelCount, mipLevel => softwareTextureReadLevel(..., mipLevel))
    //          if nearest
    //            pass = got === one of colorForEachMipLevel
    //          else // linear
    //            pass = false;
    //            for (i = 0; !pass && i < mipLevelCount - 1; i)
    //              pass = got is between colorForEachMipLevel[i] and colorForEachMipLevel[i + 1]
    //
    //    This would check "something" but effectively it would no longer be checking "bias" for values > 12. Only that
    //    textureSampleBias returns some possible answer vs some completely wrong answer.
    //
    // 3. It's possible this check is just not possible given the precision required. We could just check bias -16 to 12
    //    and ignore values > 12. We won't be able to test clamping but maybe that's irrelevant.
    //
    const callSpecificMaxFractionalDiff =
      call.bias! >= 12 ? maxFractionalDiff * (2 + call.bias! - 12) : maxFractionalDiff;

    // The spec says depth and stencil have implementation defined values for G, B, and A
    // so if this is `textureGather` and component > 0 then there's nothing to check.
    if (
      isDepthOrStencilTextureFormat(format) &&
      isBuiltinGather(call.builtin) &&
      call.component! > 0
    ) {
      continue;
    }

    if (
      texelsApproximatelyEqual(
        gotRGBA,
        softwareTexture.descriptor.format,
        expectRGBA,
        format,
        callSpecificMaxFractionalDiff
      )
    ) {
      continue;
    }

    if (
      !sampler &&
      okBecauseOutOfBounds(softwareTexture, call, gotRGBA, callSpecificMaxFractionalDiff)
    ) {
      continue;
    }

    const gULP = getULPFromZeroForComponents(gotRGBA, format, call.builtin, call.component);
    const eULP = getULPFromZeroForComponents(expectRGBA, format, call.builtin, call.component);

    // from the spec: https://gpuweb.github.io/gpuweb/#reading-depth-stencil
    // depth and stencil values are D, ?, ?, ?
    const rgbaComponentsToCheck =
      isBuiltinGather(call.builtin) || !isDepthOrStencilTextureFormat(format)
        ? kRGBAComponents
        : kRComponent;

    let bad = false;
    const diffs = rgbaComponentsToCheck.map(component => {
      const g = gotRGBA[component]!;
      const e = expectRGBA[component]!;
      const absDiff = Math.abs(g - e);
      const ulpDiff = Math.abs(gULP[component]! - eULP[component]!);
      assert(!Number.isNaN(ulpDiff));
      const maxAbs = Math.max(Math.abs(g), Math.abs(e));
      const relDiff = maxAbs > 0 ? absDiff / maxAbs : 0;
      if (ulpDiff > 3 && absDiff > callSpecificMaxFractionalDiff) {
        bad = true;
      }
      return { absDiff, relDiff, ulpDiff };
    });

    const isFloatType = (format: GPUTextureFormat) => {
      const type = getTextureFormatType(format);
      return type === 'float' || type === 'depth';
    };
    const fix5 = (n: number) => (isFloatType(format) ? n.toFixed(5) : n.toString());
    const fix5v = (arr: number[]) => arr.map(v => fix5(v)).join(', ');
    const rgbaToArray = (p: PerTexelComponent<number>): number[] =>
      rgbaComponentsToCheck.map(component => p[component]!);

    if (bad) {
      const { baseMipLevel, mipLevelCount, baseArrayLayer, arrayLayerCount, baseMipLevelSize } =
        getBaseMipLevelInfo(softwareTexture);
      const physicalMipLevelCount = softwareTexture.descriptor.mipLevelCount ?? 1;

      const desc = describeTextureCall(call);
      errs.push(`result was not as expected:
   physical size: [${size.width}, ${size.height}, ${size.depthOrArrayLayers}]
    baseMipLevel: ${baseMipLevel}
   mipLevelCount: ${mipLevelCount}
  baseArrayLayer: ${baseArrayLayer}
 arrayLayerCount: ${arrayLayerCount}
physicalMipCount: ${physicalMipLevelCount}
            call: ${desc}  // #${callIdx}`);
      if (isCubeViewDimension(softwareTexture.viewDescriptor)) {
        const coord = convertCubeCoordToNormalized3DTextureCoord(call.coords as vec3);
        const faceNdx = Math.floor(coord[2] * 6);
        errs.push(`          : as 3D texture coord: (${coord[0]}, ${coord[1]}, ${coord[2]})`);
        for (let mipLevel = 0; mipLevel < physicalMipLevelCount; ++mipLevel) {
          const mipSize = virtualMipSize(
            softwareTexture.descriptor.dimension ?? '2d',
            softwareTexture.descriptor.size,
            mipLevel
          );
          const t = coord.slice(0, 2).map((v, i) => (v * mipSize[i]).toFixed(3));
          errs.push(
            `          : as texel coord mip level[${mipLevel}]: (${t[0]}, ${t[1]}), face: ${faceNdx}(${kFaceNames[faceNdx]})`
          );
        }
      } else if (call.coordType === 'f') {
        for (let mipLevel = 0; mipLevel < physicalMipLevelCount; ++mipLevel) {
          const mipSize = virtualMipSize(
            softwareTexture.descriptor.dimension ?? '2d',
            softwareTexture.descriptor.size,
            mipLevel
          );
          const t = call.coords!.map((v, i) => (v * mipSize[i]).toFixed(3));
          errs.push(`          : as texel coord @ mip level[${mipLevel}]: (${t.join(', ')})`);
        }
      }
      if (builtinNeedsDerivatives(call.builtin)) {
        const ddx = derivativeForCall<T>(softwareTexture, call, true);
        const ddy = derivativeForCall<T>(softwareTexture, call, false);
        const mipLevel = computeMipLevelFromGradients(ddx, ddy, baseMipLevelSize);
        const biasStr = call.bias === undefined ? '' : ' (without bias)';
        errs.push(`implicit derivative based mip level: ${fix5(mipLevel)}${biasStr}`);
        if (call.bias) {
          const clampedBias = clamp(call.bias ?? 0, { min: -16.0, max: 15.99 });
          errs.push(`\
                       clamped bias: ${fix5(clampedBias)}
                mip level with bias: ${fix5(mipLevel + clampedBias)}`);
        }
      } else if (call.ddx) {
        const mipLevel = computeMipLevelFromGradientsForCall(call, size);
        errs.push(`gradient based mip level: ${mipLevel}`);
      }
      errs.push(`\
       got: ${fix5v(rgbaToArray(gotRGBA))}
  expected: ${fix5v(rgbaToArray(expectRGBA))}
  max diff: ${callSpecificMaxFractionalDiff}
 abs diffs: ${fix5v(diffs.map(({ absDiff }) => absDiff))}
 rel diffs: ${diffs.map(({ relDiff }) => `${(relDiff * 100).toFixed(2)}%`).join(', ')}
 ulp diffs: ${diffs.map(({ ulpDiff }) => ulpDiff).join(', ')}
`);

      if (sampler) {
        if (t.rec.debugging) {
          // For compares, we can't use the builtin (textureXXXCompareXXX) because it only
          // returns 0 or 1 or the average of 0 and 1 for multiple samples. And, for example,
          // if the comparison is `always` then every sample returns 1. So we need to use the
          // corresponding sample function to get the actual values from the textures
          //
          // textureSampleCompare -> textureSample
          // textureSampleCompareLevel -> textureSampleLevel
          // textureGatherCompare -> textureGather
          if (isBuiltinComparison(call.builtin)) {
            if (!haveComparisonCheckInfo) {
              // Convert the comparison calls to their corresponding non-comparison call
              const debugCalls = calls.map(call => {
                const debugCall = { ...call };
                debugCall.depthRef = undefined;
                switch (call.builtin) {
                  case 'textureGatherCompare':
                    debugCall.builtin = 'textureGather';
                    break;
                  case 'textureSampleCompare':
                    debugCall.builtin = 'textureSample';
                    break;
                  case 'textureSampleCompareLevel':
                    debugCall.builtin = 'textureSampleLevel';
                    debugCall.levelType = 'u';
                    debugCall.mipLevel = 0;
                    break;
                  default:
                    unreachable();
                }
                return debugCall;
              });

              // Convert the comparison sampler to a non-comparison sampler
              const debugSampler = { ...sampler };
              delete debugSampler.compare;

              // Make a runner for these changed calls.
              const debugRunner = createTextureCallsRunner(
                t,
                {
                  format,
                  dimension: softwareTexture.descriptor.dimension ?? '2d',
                  sampleCount: softwareTexture.descriptor.sampleCount ?? 1,
                  depthOrArrayLayers: size.depthOrArrayLayers,
                },
                softwareTexture.viewDescriptor,
                textureType,
                debugSampler,
                debugCalls,
                stage
              );
              checkInfo = {
                runner: debugRunner,
                sampler: debugSampler,
                calls: debugCalls,
              };
              haveComparisonCheckInfo = true;
            }
          }

          if (!gpuTexels && gpuTexture) {
            // Read the texture back if we haven't yet. We'll use this
            // to get values for each sample point.
            gpuTexels = await readTextureToTexelViews(
              t,
              gpuTexture,
              softwareTexture.descriptor,
              getTexelViewFormatForTextureFormat(gpuTexture.format)
            );
          }

          const callForSamplePoints = checkInfo.calls[callIdx];

          // We're going to create textures with black and white texels
          // but if it's a compressed texture we use an encodable texture.
          // It's not perfect but we already know it failed. We're just hoping
          // to get sample points.
          const useTexelFormatForGPUTexture = isCompressedTextureFormat(
            softwareTexture.descriptor.format
          );

          if (useTexelFormatForGPUTexture) {
            errs.push(`
### WARNING: sample points are derived from un-compressed textures and may not match the
actual GPU results of sampling a compressed texture. The test itself failed at this point
(see expected: and got: above). We're only trying to determine what the GPU sampled, but
we can not do that easily with compressed textures. ###
`);
          }

          const expectedSamplePoints = [
            'expected:',
            ...(await identifySamplePoints(
              softwareTexture,
              sampler,
              callForSamplePoints,
              call,
              softwareTexture.texels,
              (texels: TexelView[]) => {
                return Promise.resolve(
                  softwareTextureRead(
                    t,
                    stage,
                    callForSamplePoints,
                    {
                      texels,
                      descriptor: softwareTexture.descriptor,
                      viewDescriptor: softwareTexture.viewDescriptor,
                    },
                    checkInfo.sampler
                  )
                );
              }
            )),
          ];
          const gotSamplePoints = [
            'got:',
            ...(await identifySamplePoints(
              softwareTexture,
              sampler,
              callForSamplePoints,
              call,
              gpuTexels,
              async (texels: TexelView[]) => {
                const descriptor = { ...softwareTexture.descriptor };
                if (useTexelFormatForGPUTexture) {
                  descriptor.format = texels[0].format;
                }
                const gpuTexture = createTextureFromTexelViewsLocal(t, texels, descriptor);
                const result = (await checkInfo.runner.run(gpuTexture))[callIdx];
                gpuTexture.destroy();
                return result;
              }
            )),
          ];
          errs.push('  sample points:');
          errs.push(layoutTwoColumns(expectedSamplePoints, gotSamplePoints).join('\n'));
          errs.push('', '');
        }

        // this is not an else because it's common to comment out the previous `if` for running on a CQ.
        if (!t.rec.debugging) {
          errs.push('### turn on debugging to see sample points ###');
        }
      } // if (sampler)

      // Don't report the other errors. There 50 sample points per subcase and
      // 50-100 subcases so the log would get enormous if all 50 fail. One
      // report per subcase is enough.
      break;
    } // if (bad)
  } // for cellNdx

  results.runner.destroy();
  checkInfo.runner.destroy();

  return errs.length > 0 ? new Error(errs.join('\n')) : undefined;
}