export function runDerivativeTest()

in src/webgpu/shader/execution/expression/call/builtin/derivatives.ts [16:234]


export function runDerivativeTest(
  t: GPUTest,
  cases: Case[],
  builtin: string,
  non_uniform_discard: boolean,
  vectorize?: number
) {
  // If the 'vectorize' config option was provided, pack the cases into vectors.
  let type: Type = Type.f32;
  if (vectorize !== undefined) {
    const packed = packScalarsToVector([type, type], type, cases, vectorize);
    cases = packed.cases;
    type = packed.resultType;
  }

  ////////////////////////////////////////////////////////////////
  // The two input values for a given case are distributed to two different invocations in a quad.
  // We will populate a uniform buffer with these input values laid out sequentially:
  // [ case_0_input_1, case_0_input_0, case_1_input_1, case_1_input_0, ...]
  //
  // The render pipeline will be launched once per pair of cases over a viewport
  // size of (2, 2). Each 2x2 set of calls will will exercise two test cases.
  // Each of these draw calls will use a different instance index, which is
  // forwarded to the fragment shader. Each invocation returns the result which
  // is stored in a rgba32uint texture.
  //
  // Consider draw calls that test 4 cases (c_0, c_1, c_2, c_3).
  //
  // For derivatives along the 'x' direction, the mapping from fragment position to case input is:
  // Quad 0: | c_0_i_1 | c_0_i_0 |     Quad 1: | c_2_i_1 | c_2_i_0 |
  //         | c_1_i_1 | c_1_i_0 |             | c_3_i_1 | c_3_i_0 |
  //
  // For derivatives along the 'y' direction, the mapping from fragment position to case input is:
  // Quad 0: | c_0_i_1 | c_1_i_1 |     Quad 1: | c_2_i_1 | c_3_i_1 |
  //         | c_0_i_0 | c_1_i_0 |             | c_2_i_0 | c_3_i_0 |
  //
  ////////////////////////////////////////////////////////////////

  // Determine the direction of the derivative ('x' or 'y') from the builtin name.
  const dir = builtin[3];

  // Determine the WGSL type to use in the shader, and the stride in bytes between values.
  const valueStride = 16;
  let conversionFromInput = 'input.x';
  let conversionToOutput = `vec4f(v)`;
  if (vectorize) {
    switch (vectorize) {
      case 2:
        conversionFromInput = 'input.xy';
        conversionToOutput = 'vec4f(v, 0, 0)';
        break;
      case 3:
        conversionFromInput = 'input.xyz';
        conversionToOutput = 'vec4f(v, 0)';
        break;
      case 4:
        conversionFromInput = 'input';
        conversionToOutput = 'v';
        break;
    }
  }

  // Define a vertex shader that draws a triangle over the full viewport, and a fragment shader that
  // calls the derivative builtin with a value loaded from that fragment's index into the storage
  // buffer (determined using the quad index and fragment position, as described above).
  const code = `
struct CaseInfo {
  @builtin(position) position: vec4f,
  @location(0) @interpolate(flat, either) quad_idx: u32,
}

@vertex
fn vert(@builtin(vertex_index) vertex_idx: u32,
        @builtin(instance_index) instance_idx: u32) -> CaseInfo {
  const kVertices = array(
    vec2f(-2, -2),
    vec2f( 2, -2),
    vec2f( 0,  2),
  );
  return CaseInfo(vec4(kVertices[vertex_idx], 0, 1), instance_idx);
}

@group(0) @binding(0) var<uniform> inputs : array<vec4f, ${cases.length * 2}>;

@fragment
fn frag(info : CaseInfo) -> @location(0) vec4u {
  let case_idx = u32(info.position.${dir === 'x' ? 'y' : 'x'});
  let inv_idx = u32(info.position.${dir});
  let index = info.quad_idx*4 + case_idx*2 + inv_idx;
  let input = inputs[index];
  ${non_uniform_discard ? 'if inv_idx == 0 { discard; }' : ''}
  let v = ${builtin}(${conversionFromInput});
  return bitcast<vec4u>(${conversionToOutput});
}
`;

  // Create the render pipeline.
  const module = t.device.createShaderModule({ code });
  const pipeline = t.device.createRenderPipeline({
    layout: 'auto',
    vertex: { module },
    fragment: { module, targets: [{ format: 'rgba32uint' }] },
  });

  // Create storage buffers to hold the inputs and outputs.
  const bufferSize = cases.length * 2 * valueStride;
  const inputBuffer = t.createBufferTracked({
    size: bufferSize,
    usage: GPUBufferUsage.UNIFORM,
    mappedAtCreation: true,
  });

  // Populate the input uniform buffer with case input values.
  const valuesData = new Uint8Array(inputBuffer.getMappedRange());
  for (let i = 0; i < cases.length; i++) {
    const inputs = cases[i].input as ReadonlyArray<Value>;
    inputs[0].copyTo(valuesData, (i * 2 + 1) * valueStride);
    inputs[1].copyTo(valuesData, i * 2 * valueStride);
  }
  inputBuffer.unmap();

  // Create a bind group for the storage buffers.
  const group = t.device.createBindGroup({
    entries: [{ binding: 0, resource: { buffer: inputBuffer } }],
    layout: pipeline.getBindGroupLayout(0),
  });

  const colorAttachment = t.createTextureTracked({
    size: { width: 2, height: 2 },
    format: 'rgba32uint',
    usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC,
  });
  const bytesPerRow = align(valueStride * colorAttachment.width, 256);

  // Submit the render pass to the device.
  const results = [];
  const encoder = t.device.createCommandEncoder({ label: 'runDerivativeTest' });
  for (let quad = 0; quad < cases.length / 2; quad++) {
    const pass = encoder.beginRenderPass({
      colorAttachments: [
        {
          view: colorAttachment.createView(),
          loadOp: 'clear',
          storeOp: 'store',
        },
      ],
    });
    pass.setPipeline(pipeline);
    pass.setBindGroup(0, group);
    pass.draw(3, 1, 0, quad);
    pass.end();
    const outputBuffer = t.createBufferTracked({
      size: bytesPerRow * colorAttachment.height,
      usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC,
    });
    results.push(outputBuffer);
    encoder.copyTextureToBuffer(
      { texture: colorAttachment },
      { buffer: outputBuffer, bytesPerRow },
      [colorAttachment.width, colorAttachment.height]
    );
  }

  t.queue.submit([encoder.finish()]);

  // Check the outputs match the expected results.
  results.forEach((outputBuffer, quadNdx) => {
    t.expectGPUBufferValuesPassCheck(
      outputBuffer,
      (outputData: Uint8Array) => {
        for (let i = 0; i < 4; ++i) {
          const tx = i % 2;
          const ty = (i / 2) | 0;
          const [inputNdx, caseNdx] = dir === 'x' ? [tx, ty] : [ty, tx];
          const caseNdxAlt = 1 - caseNdx;
          const c = cases[quadNdx * 2 + caseNdx];

          // Both invocations involved in the derivative should get the same result.
          if (non_uniform_discard && inputNdx === 0) {
            continue;
          }

          const index = ty * bytesPerRow + tx * valueStride;
          const result = type.read(outputData, index);
          const cmp = toComparator(c.expected).compare(result);
          if (!cmp.matched) {
            // If this is a coarse derivative, the implementation is also allowed to calculate only
            // one of the two derivatives and return that result to all of the invocations.
            if (!builtin.endsWith('Fine')) {
              const c0 = cases[quadNdx * 2 + caseNdxAlt];
              const cmp0 = toComparator(c0.expected).compare(result);
              if (!cmp0.matched) {
                return new Error(`
  1st pair: (${(c.input as Value[]).join(', ')})
  expected: ${cmp.expected}

  2nd pair: (${(c0.input as Value[]).join(', ')})
  expected: ${cmp0.expected}

  returned: ${result}`);
              }
            } else {
              return new Error(`
    inputs: (${(c.input as Value[]).join(', ')})
  expected: ${cmp.expected}

  returned: ${result}`);
            }
          }
        }
        return undefined;
      },
      {
        type: Uint8Array,
        typedLength: outputBuffer.size,
      }
    );
  });
}