in src/sksl/codegen/SkSLRasterPipelineBuilder.cpp [1811:2478]
void Program::makeStages(TArray<Stage>* pipeline,
SkArenaAlloc* alloc,
SkSpan<const float> uniforms,
const SlotData& slots) const {
SkASSERT(fNumUniformSlots == SkToInt(uniforms.size()));
const int N = SkOpts::raster_pipeline_highp_stride;
int mostRecentRewind = 0;
// Assemble a map holding the current stack-top for each temporary stack. Position each temp
// stack immediately after the previous temp stack; temp stacks are never allowed to overlap.
int pos = 0;
TArray<float*> tempStackMap;
tempStackMap.resize(fTempStackMaxDepths.size());
for (int idx = 0; idx < fTempStackMaxDepths.size(); ++idx) {
tempStackMap[idx] = slots.stack.begin() + (pos * N);
pos += fTempStackMaxDepths[idx];
}
// Track labels that we have reached in processing.
TArray<int> labelToInstructionIndex;
labelToInstructionIndex.push_back_n(fNumLabels, -1);
int mostRecentInvocationInstructionIdx = 0;
auto EmitStackRewindForBackwardsBranch = [&](int labelID) {
// If we have already encountered the label associated with this branch, this is a
// backwards branch. Add a stack-rewind immediately before the branch to ensure that
// long-running loops don't use an unbounded amount of stack space.
int labelInstructionIdx = labelToInstructionIndex[labelID];
if (labelInstructionIdx >= 0) {
if (mostRecentInvocationInstructionIdx > labelInstructionIdx) {
// The backwards-branch range includes an external invocation to another shader,
// color filter, blender, or colorspace conversion. In this case, we always emit a
// stack rewind, since the non-tailcall stages may exist on the stack.
this->appendStackRewind(pipeline);
} else {
// The backwards-branch range only includes SkSL ops. If tailcalling is supported,
// stack rewinding isn't needed. If the platform cannot tailcall, we need to rewind.
this->appendStackRewindForNonTailcallers(pipeline);
}
mostRecentRewind = pipeline->size();
}
};
auto* const basePtr = (std::byte*)slots.values.data();
auto OffsetFromBase = [&](const void* ptr) -> SkRPOffset {
return (SkRPOffset)((const std::byte*)ptr - basePtr);
};
// Copy all immutable values into the immutable slots.
for (const Instruction& inst : fInstructions) {
if (inst.fOp == BuilderOp::store_immutable_value) {
slots.immutable[inst.fSlotA] = sk_bit_cast<float>(inst.fImmA);
}
}
// Write each BuilderOp to the pipeline array.
pipeline->reserve_exact(pipeline->size() + fInstructions.size());
for (int instructionIdx = 0; instructionIdx < fInstructions.size(); ++instructionIdx) {
const Instruction& inst = fInstructions[instructionIdx];
auto ImmutableA = [&]() { return &slots.immutable[1 * inst.fSlotA]; };
auto ImmutableB = [&]() { return &slots.immutable[1 * inst.fSlotB]; };
auto SlotA = [&]() { return &slots.values[N * inst.fSlotA]; };
auto SlotB = [&]() { return &slots.values[N * inst.fSlotB]; };
auto UniformA = [&]() { return &uniforms[inst.fSlotA]; };
auto AllocTraceContext = [&](auto* ctx) {
// We pass `ctx` solely for its type; the value is unused.
using ContextType = typename std::remove_reference<decltype(*ctx)>::type;
ctx = alloc->make<ContextType>();
ctx->traceMask = reinterpret_cast<int*>(tempStackMap[inst.fImmA] - N);
ctx->traceHook = fTraceHook.get();
return ctx;
};
float*& tempStackPtr = tempStackMap[inst.fStackID];
switch (inst.fOp) {
case BuilderOp::label: {
intptr_t labelID = inst.fImmA;
SkASSERT(labelID >= 0 && labelID < fNumLabels);
SkASSERT(labelToInstructionIndex[labelID] == -1);
labelToInstructionIndex[labelID] = instructionIdx;
pipeline->push_back({ProgramOp::label, context_bit_pun(labelID)});
break;
}
case BuilderOp::jump:
case BuilderOp::branch_if_any_lanes_active:
case BuilderOp::branch_if_no_lanes_active: {
SkASSERT(inst.fImmA >= 0 && inst.fImmA < fNumLabels);
EmitStackRewindForBackwardsBranch(inst.fImmA);
auto* ctx = alloc->make<SkRasterPipelineContexts::BranchCtx>();
ctx->offset = inst.fImmA;
pipeline->push_back({(ProgramOp)inst.fOp, ctx});
break;
}
case BuilderOp::branch_if_all_lanes_active: {
SkASSERT(inst.fImmA >= 0 && inst.fImmA < fNumLabels);
EmitStackRewindForBackwardsBranch(inst.fImmA);
auto* ctx = alloc->make<SkRasterPipelineContexts::BranchIfAllLanesActiveCtx>();
ctx->offset = inst.fImmA;
pipeline->push_back({ProgramOp::branch_if_all_lanes_active, ctx});
break;
}
case BuilderOp::branch_if_no_active_lanes_on_stack_top_equal: {
SkASSERT(inst.fImmA >= 0 && inst.fImmA < fNumLabels);
EmitStackRewindForBackwardsBranch(inst.fImmA);
auto* ctx = alloc->make<SkRasterPipelineContexts::BranchIfEqualCtx>();
ctx->offset = inst.fImmA;
ctx->value = inst.fImmB;
ctx->ptr = reinterpret_cast<int*>(tempStackPtr - N);
pipeline->push_back({ProgramOp::branch_if_no_active_lanes_eq, ctx});
break;
}
case BuilderOp::init_lane_masks: {
auto* ctx = alloc->make<SkRasterPipelineContexts::InitLaneMasksCtx>();
pipeline->push_back({ProgramOp::init_lane_masks, ctx});
break;
}
case BuilderOp::store_src_rg:
pipeline->push_back({ProgramOp::store_src_rg, SlotA()});
break;
case BuilderOp::store_src:
pipeline->push_back({ProgramOp::store_src, SlotA()});
break;
case BuilderOp::store_dst:
pipeline->push_back({ProgramOp::store_dst, SlotA()});
break;
case BuilderOp::store_device_xy01:
pipeline->push_back({ProgramOp::store_device_xy01, SlotA()});
break;
case BuilderOp::store_immutable_value:
// The immutable slots were populated in an earlier pass.
break;
case BuilderOp::load_src:
pipeline->push_back({ProgramOp::load_src, SlotA()});
break;
case BuilderOp::load_dst:
pipeline->push_back({ProgramOp::load_dst, SlotA()});
break;
case ALL_SINGLE_SLOT_UNARY_OP_CASES: {
float* dst = tempStackPtr - (inst.fImmA * N);
this->appendSingleSlotUnaryOp(pipeline, (ProgramOp)inst.fOp, dst, inst.fImmA);
break;
}
case ALL_MULTI_SLOT_UNARY_OP_CASES: {
float* dst = tempStackPtr - (inst.fImmA * N);
this->appendMultiSlotUnaryOp(pipeline, (ProgramOp)inst.fOp, dst, inst.fImmA);
break;
}
case ALL_IMMEDIATE_BINARY_OP_CASES: {
float* dst = (inst.fSlotA == NA) ? tempStackPtr - (inst.fImmA * N)
: SlotA();
this->appendImmediateBinaryOp(pipeline, alloc, (ProgramOp)inst.fOp,
OffsetFromBase(dst), inst.fImmB, inst.fImmA);
break;
}
case ALL_N_WAY_BINARY_OP_CASES: {
float* src = tempStackPtr - (inst.fImmA * N);
float* dst = tempStackPtr - (inst.fImmA * 2 * N);
this->appendAdjacentNWayBinaryOp(pipeline, alloc, (ProgramOp)inst.fOp,
OffsetFromBase(dst), OffsetFromBase(src),
inst.fImmA);
break;
}
case ALL_MULTI_SLOT_BINARY_OP_CASES: {
float* src = tempStackPtr - (inst.fImmA * N);
float* dst = tempStackPtr - (inst.fImmA * 2 * N);
this->appendAdjacentMultiSlotBinaryOp(pipeline, alloc, (ProgramOp)inst.fOp,
basePtr,
OffsetFromBase(dst),
OffsetFromBase(src),
inst.fImmA);
break;
}
case ALL_N_WAY_TERNARY_OP_CASES: {
float* src1 = tempStackPtr - (inst.fImmA * N);
float* src0 = tempStackPtr - (inst.fImmA * 2 * N);
float* dst = tempStackPtr - (inst.fImmA * 3 * N);
this->appendAdjacentNWayTernaryOp(pipeline, alloc, (ProgramOp)inst.fOp, basePtr,
OffsetFromBase(dst),
OffsetFromBase(src0),
OffsetFromBase(src1),
inst.fImmA);
break;
}
case ALL_MULTI_SLOT_TERNARY_OP_CASES: {
float* src1 = tempStackPtr - (inst.fImmA * N);
float* src0 = tempStackPtr - (inst.fImmA * 2 * N);
float* dst = tempStackPtr - (inst.fImmA * 3 * N);
this->appendAdjacentMultiSlotTernaryOp(pipeline, alloc,(ProgramOp)inst.fOp, basePtr,
OffsetFromBase(dst),
OffsetFromBase(src0),
OffsetFromBase(src1),
inst.fImmA);
break;
}
case BuilderOp::select: {
float* src = tempStackPtr - (inst.fImmA * N);
float* dst = tempStackPtr - (inst.fImmA * 2 * N);
this->appendCopySlotsMasked(pipeline, alloc,
OffsetFromBase(dst),
OffsetFromBase(src),
inst.fImmA);
break;
}
case BuilderOp::copy_slot_masked:
this->appendCopySlotsMasked(pipeline, alloc,
OffsetFromBase(SlotA()),
OffsetFromBase(SlotB()),
inst.fImmA);
break;
case BuilderOp::copy_slot_unmasked:
this->appendCopySlotsUnmasked(pipeline, alloc,
OffsetFromBase(SlotA()),
OffsetFromBase(SlotB()),
inst.fImmA);
break;
case BuilderOp::copy_immutable_unmasked:
this->appendCopyImmutableUnmasked(pipeline, alloc, basePtr,
OffsetFromBase(SlotA()),
OffsetFromBase(ImmutableB()),
inst.fImmA);
break;
case BuilderOp::refract_4_floats: {
float* dst = tempStackPtr - (9 * N);
pipeline->push_back({ProgramOp::refract_4_floats, dst});
break;
}
case BuilderOp::inverse_mat2:
case BuilderOp::inverse_mat3:
case BuilderOp::inverse_mat4: {
float* dst = tempStackPtr - (inst.fImmA * N);
pipeline->push_back({(ProgramOp)inst.fOp, dst});
break;
}
case BuilderOp::dot_2_floats:
case BuilderOp::dot_3_floats:
case BuilderOp::dot_4_floats: {
float* dst = tempStackPtr - (inst.fImmA * 2 * N);
pipeline->push_back({(ProgramOp)inst.fOp, dst});
break;
}
case BuilderOp::swizzle_1: {
// A single-component swizzle just copies a slot and shrinks the stack; we can
// slightly improve codegen by making that simplification here.
int offset = inst.fImmB;
SkASSERT(offset >= 0 && offset <= 15);
float* dst = tempStackPtr - (inst.fImmA * N);
float* src = dst + (offset * N);
if (src != dst) {
this->appendCopySlotsUnmasked(pipeline, alloc,
OffsetFromBase(dst),
OffsetFromBase(src),
/*numSlots=*/1);
}
break;
}
case BuilderOp::swizzle_2:
case BuilderOp::swizzle_3:
case BuilderOp::swizzle_4: {
SkRasterPipelineContexts::SwizzleCtx ctx;
ctx.dst = OffsetFromBase(tempStackPtr - (N * inst.fImmA));
// Unpack component nybbles into byte-offsets pointing at stack slots.
unpack_nybbles_to_offsets(inst.fImmB, SkSpan(ctx.offsets));
pipeline->push_back({(ProgramOp)inst.fOp, SkRPCtxUtils::Pack(ctx, alloc)});
break;
}
case BuilderOp::shuffle: {
int consumed = inst.fImmA;
int generated = inst.fImmB;
auto* ctx = alloc->make<SkRasterPipelineContexts::ShuffleCtx>();
ctx->ptr = reinterpret_cast<int32_t*>(tempStackPtr) - (N * consumed);
ctx->count = generated;
// Unpack immB and immC from nybble form into the offset array.
unpack_nybbles_to_offsets(inst.fImmC, SkSpan(&ctx->offsets[0], 8));
unpack_nybbles_to_offsets(inst.fImmD, SkSpan(&ctx->offsets[8], 8));
pipeline->push_back({ProgramOp::shuffle, ctx});
break;
}
case BuilderOp::matrix_multiply_2:
case BuilderOp::matrix_multiply_3:
case BuilderOp::matrix_multiply_4: {
int consumed = (inst.fImmB * inst.fImmC) + // result
(inst.fImmA * inst.fImmB) + // left-matrix
(inst.fImmC * inst.fImmD); // right-matrix
SkRasterPipelineContexts::MatrixMultiplyCtx ctx;
ctx.dst = OffsetFromBase(tempStackPtr - (N * consumed));
ctx.leftColumns = inst.fImmA;
ctx.leftRows = inst.fImmB;
ctx.rightColumns = inst.fImmC;
ctx.rightRows = inst.fImmD;
pipeline->push_back({(ProgramOp)inst.fOp, SkRPCtxUtils::Pack(ctx, alloc)});
break;
}
case BuilderOp::exchange_src: {
float* dst = tempStackPtr - (4 * N);
pipeline->push_back({ProgramOp::exchange_src, dst});
break;
}
case BuilderOp::push_src_rgba: {
float* dst = tempStackPtr;
pipeline->push_back({ProgramOp::store_src, dst});
break;
}
case BuilderOp::push_dst_rgba: {
float* dst = tempStackPtr;
pipeline->push_back({ProgramOp::store_dst, dst});
break;
}
case BuilderOp::push_device_xy01: {
float* dst = tempStackPtr;
pipeline->push_back({ProgramOp::store_device_xy01, dst});
break;
}
case BuilderOp::pop_src_rgba: {
float* src = tempStackPtr - (4 * N);
pipeline->push_back({ProgramOp::load_src, src});
break;
}
case BuilderOp::pop_dst_rgba: {
float* src = tempStackPtr - (4 * N);
pipeline->push_back({ProgramOp::load_dst, src});
break;
}
case BuilderOp::push_slots: {
float* dst = tempStackPtr;
this->appendCopySlotsUnmasked(pipeline, alloc,
OffsetFromBase(dst),
OffsetFromBase(SlotA()),
inst.fImmA);
break;
}
case BuilderOp::push_immutable: {
float* dst = tempStackPtr;
this->appendCopyImmutableUnmasked(pipeline, alloc, basePtr,
OffsetFromBase(dst),
OffsetFromBase(ImmutableA()),
inst.fImmA);
break;
}
case BuilderOp::copy_stack_to_slots_indirect:
case BuilderOp::push_immutable_indirect:
case BuilderOp::push_slots_indirect:
case BuilderOp::push_uniform_indirect: {
// SlotA: fixed-range start
// SlotB: limit-range end
// immA: number of slots to copy
// immB: dynamic stack ID
ProgramOp op;
auto* ctx = alloc->make<SkRasterPipelineContexts::CopyIndirectCtx>();
ctx->indirectOffset =
reinterpret_cast<const uint32_t*>(tempStackMap[inst.fImmB]) - (1 * N);
ctx->indirectLimit = inst.fSlotB - inst.fSlotA - inst.fImmA;
ctx->slots = inst.fImmA;
if (inst.fOp == BuilderOp::push_slots_indirect) {
op = ProgramOp::copy_from_indirect_unmasked;
ctx->src = reinterpret_cast<const int32_t*>(SlotA());
ctx->dst = reinterpret_cast<int32_t*>(tempStackPtr);
} else if (inst.fOp == BuilderOp::push_immutable_indirect) {
// We reuse the indirect-uniform op for indirect copies of immutable data.
op = ProgramOp::copy_from_indirect_uniform_unmasked;
ctx->src = reinterpret_cast<const int32_t*>(ImmutableA());
ctx->dst = reinterpret_cast<int32_t*>(tempStackPtr);
} else if (inst.fOp == BuilderOp::push_uniform_indirect) {
op = ProgramOp::copy_from_indirect_uniform_unmasked;
ctx->src = reinterpret_cast<const int32_t*>(UniformA());
ctx->dst = reinterpret_cast<int32_t*>(tempStackPtr);
} else {
op = ProgramOp::copy_to_indirect_masked;
ctx->src = reinterpret_cast<const int32_t*>(tempStackPtr) - (ctx->slots * N);
ctx->dst = reinterpret_cast<int32_t*>(SlotA());
}
pipeline->push_back({op, ctx});
break;
}
case BuilderOp::push_uniform:
case BuilderOp::copy_uniform_to_slots_unmasked: {
const float* src = UniformA();
float* dst = (inst.fOp == BuilderOp::push_uniform) ? tempStackPtr : SlotB();
for (int remaining = inst.fImmA; remaining > 0; remaining -= 4) {
auto ctx = alloc->make<SkRasterPipelineContexts::UniformCtx>();
ctx->dst = reinterpret_cast<int32_t*>(dst);
ctx->src = reinterpret_cast<const int32_t*>(src);
switch (remaining) {
case 1: pipeline->push_back({ProgramOp::copy_uniform, ctx}); break;
case 2: pipeline->push_back({ProgramOp::copy_2_uniforms, ctx}); break;
case 3: pipeline->push_back({ProgramOp::copy_3_uniforms, ctx}); break;
default: pipeline->push_back({ProgramOp::copy_4_uniforms, ctx}); break;
}
dst += 4 * N;
src += 4;
}
break;
}
case BuilderOp::push_condition_mask: {
float* dst = tempStackPtr;
pipeline->push_back({ProgramOp::store_condition_mask, dst});
break;
}
case BuilderOp::pop_condition_mask: {
float* src = tempStackPtr - (1 * N);
pipeline->push_back({ProgramOp::load_condition_mask, src});
break;
}
case BuilderOp::merge_condition_mask:
case BuilderOp::merge_inv_condition_mask: {
float* ptr = tempStackPtr - (2 * N);
pipeline->push_back({(ProgramOp)inst.fOp, ptr});
break;
}
case BuilderOp::push_loop_mask: {
float* dst = tempStackPtr;
pipeline->push_back({ProgramOp::store_loop_mask, dst});
break;
}
case BuilderOp::pop_loop_mask: {
float* src = tempStackPtr - (1 * N);
pipeline->push_back({ProgramOp::load_loop_mask, src});
break;
}
case BuilderOp::pop_and_reenable_loop_mask: {
float* src = tempStackPtr - (1 * N);
pipeline->push_back({ProgramOp::reenable_loop_mask, src});
break;
}
case BuilderOp::reenable_loop_mask:
pipeline->push_back({ProgramOp::reenable_loop_mask, SlotA()});
break;
case BuilderOp::mask_off_loop_mask:
pipeline->push_back({ProgramOp::mask_off_loop_mask, nullptr});
break;
case BuilderOp::merge_loop_mask: {
float* src = tempStackPtr - (1 * N);
pipeline->push_back({ProgramOp::merge_loop_mask, src});
break;
}
case BuilderOp::push_return_mask: {
float* dst = tempStackPtr;
pipeline->push_back({ProgramOp::store_return_mask, dst});
break;
}
case BuilderOp::pop_return_mask: {
float* src = tempStackPtr - (1 * N);
pipeline->push_back({ProgramOp::load_return_mask, src});
break;
}
case BuilderOp::mask_off_return_mask:
pipeline->push_back({ProgramOp::mask_off_return_mask, nullptr});
break;
case BuilderOp::copy_constant:
case BuilderOp::push_constant: {
float* dst = (inst.fOp == BuilderOp::copy_constant) ? SlotA() : tempStackPtr;
// Splat constant values onto the stack.
for (int remaining = inst.fImmA; remaining > 0; remaining -= 4) {
SkRasterPipelineContexts::ConstantCtx ctx;
ctx.dst = OffsetFromBase(dst);
ctx.value = inst.fImmB;
void* ptr = SkRPCtxUtils::Pack(ctx, alloc);
switch (remaining) {
case 1: pipeline->push_back({ProgramOp::copy_constant, ptr}); break;
case 2: pipeline->push_back({ProgramOp::splat_2_constants, ptr}); break;
case 3: pipeline->push_back({ProgramOp::splat_3_constants, ptr}); break;
default: pipeline->push_back({ProgramOp::splat_4_constants, ptr}); break;
}
dst += 4 * N;
}
break;
}
case BuilderOp::copy_stack_to_slots: {
float* src = tempStackPtr - (inst.fImmB * N);
this->appendCopySlotsMasked(pipeline, alloc,
OffsetFromBase(SlotA()),
OffsetFromBase(src),
inst.fImmA);
break;
}
case BuilderOp::copy_stack_to_slots_unmasked: {
float* src = tempStackPtr - (inst.fImmB * N);
this->appendCopySlotsUnmasked(pipeline, alloc,
OffsetFromBase(SlotA()),
OffsetFromBase(src),
inst.fImmA);
break;
}
case BuilderOp::swizzle_copy_stack_to_slots: {
// SlotA: fixed-range start
// immA: number of swizzle components
// immB: swizzle components
// immC: offset from stack top
auto stage = (ProgramOp)((int)ProgramOp::swizzle_copy_slot_masked + inst.fImmA - 1);
auto* ctx = alloc->make<SkRasterPipelineContexts::SwizzleCopyCtx>();
ctx->src = reinterpret_cast<const int32_t*>(tempStackPtr) - (inst.fImmC * N);
ctx->dst = reinterpret_cast<int32_t*>(SlotA());
unpack_nybbles_to_offsets(inst.fImmB, SkSpan(ctx->offsets));
pipeline->push_back({stage, ctx});
break;
}
case BuilderOp::push_clone: {
float* src = tempStackPtr - (inst.fImmB * N);
float* dst = tempStackPtr;
this->appendCopySlotsUnmasked(pipeline, alloc,
OffsetFromBase(dst),
OffsetFromBase(src),
inst.fImmA);
break;
}
case BuilderOp::push_clone_from_stack: {
// immA: number of slots
// immB: other stack ID
// immC: offset from stack top
float* sourceStackPtr = tempStackMap[inst.fImmB];
float* src = sourceStackPtr - (inst.fImmC * N);
float* dst = tempStackPtr;
this->appendCopySlotsUnmasked(pipeline, alloc,
OffsetFromBase(dst),
OffsetFromBase(src),
inst.fImmA);
break;
}
case BuilderOp::push_clone_indirect_from_stack: {
// immA: number of slots
// immB: other stack ID
// immC: offset from stack top
// immD: dynamic stack ID
float* sourceStackPtr = tempStackMap[inst.fImmB];
auto* ctx = alloc->make<SkRasterPipelineContexts::CopyIndirectCtx>();
ctx->dst = reinterpret_cast<int32_t*>(tempStackPtr);
ctx->src = reinterpret_cast<const int32_t*>(sourceStackPtr) - (inst.fImmC * N);
ctx->indirectOffset =
reinterpret_cast<const uint32_t*>(tempStackMap[inst.fImmD]) - (1 * N);
ctx->indirectLimit = inst.fImmC - inst.fImmA;
ctx->slots = inst.fImmA;
pipeline->push_back({ProgramOp::copy_from_indirect_unmasked, ctx});
break;
}
case BuilderOp::swizzle_copy_stack_to_slots_indirect: {
// SlotA: fixed-range start
// SlotB: limit-range end
// immA: number of swizzle components
// immB: swizzle components
// immC: offset from stack top
// immD: dynamic stack ID
auto* ctx = alloc->make<SkRasterPipelineContexts::SwizzleCopyIndirectCtx>();
ctx->src = reinterpret_cast<const int32_t*>(tempStackPtr) - (inst.fImmC * N);
ctx->dst = reinterpret_cast<int32_t*>(SlotA());
ctx->indirectOffset =
reinterpret_cast<const uint32_t*>(tempStackMap[inst.fImmD]) - (1 * N);
ctx->indirectLimit =
inst.fSlotB - inst.fSlotA - (max_packed_nybble(inst.fImmB, inst.fImmA) + 1);
ctx->slots = inst.fImmA;
unpack_nybbles_to_offsets(inst.fImmB, SkSpan(ctx->offsets));
pipeline->push_back({ProgramOp::swizzle_copy_to_indirect_masked, ctx});
break;
}
case BuilderOp::case_op: {
SkRasterPipelineContexts::CaseOpCtx ctx;
ctx.expectedValue = inst.fImmA;
ctx.offset = OffsetFromBase(tempStackPtr - (2 * N));
pipeline->push_back({ProgramOp::case_op, SkRPCtxUtils::Pack(ctx, alloc)});
break;
}
case BuilderOp::continue_op:
pipeline->push_back({ProgramOp::continue_op, tempStackMap[inst.fImmA] - (1 * N)});
break;
case BuilderOp::pad_stack:
case BuilderOp::discard_stack:
break;
case BuilderOp::invoke_shader:
case BuilderOp::invoke_color_filter:
case BuilderOp::invoke_blender:
pipeline->push_back({(ProgramOp)inst.fOp, context_bit_pun(inst.fImmA)});
mostRecentInvocationInstructionIdx = instructionIdx;
break;
case BuilderOp::invoke_to_linear_srgb:
case BuilderOp::invoke_from_linear_srgb:
pipeline->push_back({(ProgramOp)inst.fOp, tempStackMap[inst.fImmA] - (4 * N)});
mostRecentInvocationInstructionIdx = instructionIdx;
break;
case BuilderOp::trace_line: {
auto* ctx = AllocTraceContext((SkRasterPipelineContexts::TraceLineCtx*)nullptr);
ctx->lineNumber = inst.fImmB;
pipeline->push_back({ProgramOp::trace_line, ctx});
break;
}
case BuilderOp::trace_scope: {
auto* ctx = AllocTraceContext((SkRasterPipelineContexts::TraceScopeCtx*)nullptr);
ctx->delta = inst.fImmB;
pipeline->push_back({ProgramOp::trace_scope, ctx});
break;
}
case BuilderOp::trace_enter:
case BuilderOp::trace_exit: {
auto* ctx = AllocTraceContext((SkRasterPipelineContexts::TraceFuncCtx*)nullptr);
ctx->funcIdx = inst.fImmB;
pipeline->push_back({(ProgramOp)inst.fOp, ctx});
break;
}
case BuilderOp::trace_var:
case BuilderOp::trace_var_indirect: {
// SlotA: fixed-range start
// SlotB: limit-range end
// immA: trace-mask stack ID
// immB: number of slots
// immC: dynamic stack ID
auto* ctx = AllocTraceContext((SkRasterPipelineContexts::TraceVarCtx*)nullptr);
ctx->slotIdx = inst.fSlotA;
ctx->numSlots = inst.fImmB;
ctx->data = reinterpret_cast<int*>(SlotA());
if (inst.fOp == BuilderOp::trace_var_indirect) {
ctx->indirectOffset =
reinterpret_cast<const uint32_t*>(tempStackMap[inst.fImmC]) - (1 * N);
ctx->indirectLimit = inst.fSlotB - inst.fSlotA - inst.fImmB;
} else {
ctx->indirectOffset = nullptr;
ctx->indirectLimit = 0;
}
pipeline->push_back({ProgramOp::trace_var, ctx});
break;
}
default:
SkDEBUGFAILF("Raster Pipeline: unsupported instruction %d", (int)inst.fOp);
break;
}
int stackUsage = stack_usage(inst);
if (stackUsage != 0) {
tempStackPtr += stackUsage * N;
SkASSERT(tempStackPtr >= slots.stack.begin());
SkASSERT(tempStackPtr <= slots.stack.end());
}
// Periodically rewind the stack every 500 instructions. When SK_HAS_MUSTTAIL is set,
// rewinds are not actually used; the appendStackRewind call becomes a no-op. On platforms
// that don't support SK_HAS_MUSTTAIL, rewinding the stack periodically can prevent a
// potential stack overflow when running a long program.
int numPipelineStages = pipeline->size();
if (numPipelineStages - mostRecentRewind > 500) {
this->appendStackRewindForNonTailcallers(pipeline);
mostRecentRewind = numPipelineStages;
}
}
}