in renderdoc/driver/shaders/spirv/spirv_debug.cpp [635:4002]
void ThreadState::StepNext(ShaderDebugState *state, const rdcarray<ThreadState> &workgroup)
{
m_State = state;
Iter it = debugger.GetIterForInstruction(nextInstruction);
nextInstruction++;
OpDecoder opdata(it);
// don't skip any instructions here. These should be skipped *after* processing, so that
// nextInstruction always points to the next real instruction.
switch(opdata.op)
{
//////////////////////////////////////////////////////////////////////////////
//
// Pointer manipulation opcodes
//
//////////////////////////////////////////////////////////////////////////////
case Op::Load:
{
OpLoad load(it);
// ignore
(void)load.memoryAccess;
// get the pointer value, evaluate it (i.e. dereference) and store the result
SetDst(load.result, ReadPointerValue(load.pointer));
break;
}
case Op::Store:
{
OpStore store(it);
// ignore
(void)store.memoryAccess;
WritePointerValue(store.pointer, GetSrc(store.object));
break;
}
case Op::CopyMemory:
{
OpCopyMemory copy(it);
// ignore
(void)copy.memoryAccess0;
(void)copy.memoryAccess1;
WritePointerValue(copy.target, ReadPointerValue(copy.source));
break;
}
case Op::AccessChain:
case Op::InBoundsAccessChain:
{
OpAccessChain chain(it);
rdcarray<uint32_t> indices;
// evaluate the indices
indices.reserve(chain.indexes.size());
for(Id id : chain.indexes)
indices.push_back(uintComp(GetSrc(id), 0));
SetDst(chain.result, debugger.MakeCompositePointer(
ids[chain.base], debugger.GetPointerBaseId(ids[chain.base]), indices));
break;
}
case Op::PtrAccessChain:
case Op::InBoundsPtrAccessChain:
{
OpPtrAccessChain chain(it);
rdcarray<uint32_t> indices;
// evaluate the indices
indices.reserve(chain.indexes.size());
for(Id id : chain.indexes)
indices.push_back(uintComp(GetSrc(id), 0));
ShaderVariable base = ids[chain.base];
PointerVal val = base.GetPointer();
int32_t element = intComp(GetSrc(chain.element), 0);
// adjust the address by the element. We should have the array stride since the base pointer
// must point into an array and we can't go outside it.
base.SetTypedPointer(val.pointer + element * debugger.GetPointerArrayStride(base), val.shader,
val.pointerTypeID);
SetDst(chain.result,
debugger.MakeCompositePointer(base, debugger.GetPointerBaseId(base), indices));
break;
}
case Op::ArrayLength:
{
OpArrayLength len(it);
ShaderVariable structPointer = GetSrc(len.structure);
// "Structure must be a logical pointer..." which is opaqaue in RD terminolgoy
RDCASSERT(debugger.IsOpaquePointer(structPointer));
// get the pointer base offset (should be zero for any binding but could be non-zero for a
// buffer_device_address pointer)
uint64_t offset = debugger.GetPointerByteOffset(structPointer);
// add the offset of the member
const DataType &pointerType = debugger.GetTypeForId(len.structure);
const DataType &structType = debugger.GetType(pointerType.InnerType());
offset += structType.children[len.arraymember].decorations.offset;
ShaderVariable result;
result.rows = result.columns = 1;
ShaderBindIndex bind = debugger.GetPointerValue(structPointer).GetBindIndex();
uint64_t byteLen = debugger.GetAPIWrapper()->GetBufferLength(bind) - offset;
const Decorations &dec = debugger.GetDecorations(structType.children[len.arraymember].type);
RDCASSERT(dec.flags & Decorations::HasArrayStride);
byteLen /= dec.arrayStride;
// Result Type must be an OpTypeInt with 32-bit Width and 0 Signedness
result.type = VarType::UInt;
setUintComp(result, 0, uint32_t(byteLen));
SetDst(len.result, result);
break;
}
case Op::PtrEqual:
case Op::PtrNotEqual:
{
OpPtrEqual equal(it);
ShaderVariable a = GetSrc(equal.operand1);
ShaderVariable b = GetSrc(equal.operand2);
bool isEqual = debugger.ArePointersAndEqual(a, b);
ShaderVariable var;
var.rows = var.columns = 1;
var.type = VarType::Bool;
if(opdata.op == Op::PtrEqual)
setUintComp(var, 0, isEqual ? 1 : 0);
else
setUintComp(var, 0, isEqual ? 0 : 1);
SetDst(equal.result, var);
break;
}
// physical storage pointers
case Op::ConvertPtrToU:
{
OpConvertPtrToU convert(it);
ShaderVariable ptr = GetSrc(convert.pointer);
const DataType &resultType = debugger.GetType(convert.resultType);
ptr.type = resultType.scalar().Type();
SetDst(convert.result, ptr);
break;
}
case Op::ConvertUToPtr:
{
OpConvertUToPtr convert(it);
ShaderVariable ptr = GetSrc(convert.integerValue);
const DataType &type = debugger.GetType(convert.resultType);
SetDst(convert.result, debugger.MakeTypedPointer(ptr.value.u64v[0], type));
break;
}
//////////////////////////////////////////////////////////////////////////////
//
// Derivative opcodes
//
//////////////////////////////////////////////////////////////////////////////
// spec allows the implementation to choose what DPdx means (coarse or fine), so we choose
// coarse which seems a reasonable default. In future we could driver-detect the selection in
// use (assuming it's not dynamic base on circumstances)
case Op::DPdx:
case Op::DPdy:
case Op::DPdxCoarse:
case Op::DPdyCoarse:
case Op::DPdxFine:
case Op::DPdyFine:
{
// these all share a format
OpDPdx deriv(it);
DerivDir dir = DDX;
if(opdata.op == Op::DPdy || opdata.op == Op::DPdyCoarse || opdata.op == Op::DPdyFine)
dir = DDY;
DerivType type = Coarse;
if(opdata.op == Op::DPdxFine || opdata.op == Op::DPdyFine)
type = Fine;
SetDst(deriv.result, CalcDeriv(dir, type, workgroup, deriv.p));
break;
}
case Op::Fwidth:
case Op::FwidthCoarse:
case Op::FwidthFine:
{
// these all share a format
OpFwidth deriv(it);
DerivType type = Coarse;
if(opdata.op == Op::FwidthFine)
type = Fine;
ShaderVariable var = CalcDeriv(DDX, type, workgroup, deriv.p);
ShaderVariable ddy = CalcDeriv(DDY, type, workgroup, deriv.p);
for(uint32_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(T) comp<T>(var, c) = fabs(comp<T>(var, c)) + fabs(comp<T>(ddy, c))
IMPL_FOR_FLOAT_TYPES(_IMPL);
}
SetDst(deriv.result, var);
break;
}
//////////////////////////////////////////////////////////////////////////////
//
// Composite/vector opcodes
//
//////////////////////////////////////////////////////////////////////////////
case Op::CompositeExtract:
{
OpCompositeExtract extract(it);
// to re-use composite/access chain logic, temporarily make a pointer to the composite
// (illegal in SPIR-V)
ShaderVariable ptr =
debugger.MakeCompositePointer(ids[extract.composite], extract.composite, extract.indexes);
// then evaluate it, to get the extracted value
SetDst(extract.result, debugger.ReadFromPointer(ptr));
break;
}
case Op::CompositeInsert:
{
OpCompositeInsert insert(it);
ShaderVariable var = GetSrc(insert.composite);
ShaderVariable obj = GetSrc(insert.object);
// walk any struct member indices
ShaderVariable *mod = &var;
size_t i = 0;
while(i < insert.indexes.size() && !mod->members.empty())
{
mod = &mod->members[insert.indexes[i]];
i++;
}
if(i == insert.indexes.size())
{
// if there are no more indices, replace the object here
mod->value = obj.value;
}
else if(i + 1 == insert.indexes.size())
{
// one more index
uint32_t idx = insert.indexes[i];
// if it's a matrix, replace a whole (column) vector
if(mod->rows > 1)
{
uint32_t column = idx;
RDCASSERTEQUAL(mod->rows, obj.columns);
for(uint32_t row = 0; row < mod->rows; row++)
copyComp(*mod, row * mod->columns + column, obj, row);
}
else
{
// if it's a vector, replace one scalar
copyComp(*mod, idx, obj, 0);
}
}
else if(i + 2 == insert.indexes.size())
{
// two more indices, selecting column then scalar in a matrix
uint32_t column = insert.indexes[i];
uint32_t row = insert.indexes[i + 1];
copyComp(*mod, row * mod->columns + column, obj, 0);
}
// then evaluate it, to get the extracted value
SetDst(insert.result, var);
break;
}
case Op::CompositeConstruct:
{
OpCompositeConstruct construct(it);
ShaderVariable var;
const DataType &type = debugger.GetType(construct.resultType);
RDCASSERT(!construct.constituents.empty());
if(type.type == DataType::ArrayType)
{
var.members.resize(construct.constituents.size());
for(size_t i = 0; i < construct.constituents.size(); i++)
{
var.members[i] = GetSrc(construct.constituents[i]);
var.members[i].name = StringFormat::Fmt("[%zu]", i);
}
}
else if(type.type == DataType::StructType)
{
RDCASSERTEQUAL(type.children.size(), construct.constituents.size());
var.members.resize(construct.constituents.size());
for(size_t i = 0; i < construct.constituents.size(); i++)
{
ShaderVariable &mem = var.members[i];
mem = GetSrc(construct.constituents[i]);
if(!type.children[i].name.empty())
mem.name = type.children[i].name;
else
mem.name = StringFormat::Fmt("_child%zu", i);
}
}
else if(type.type == DataType::VectorType)
{
RDCASSERT(construct.constituents.size() <= 4);
var.type = type.scalar().Type();
var.rows = 1U;
var.columns = RDCMAX(1U, type.vector().count) & 0xff;
// it is possible to construct larger vectors from a collection of scalars and smaller
// vectors.
uint32_t dst = 0;
for(size_t i = 0; i < construct.constituents.size(); i++)
{
ShaderVariable src = GetSrc(construct.constituents[i]);
RDCASSERTEQUAL(src.rows, 1);
for(uint32_t j = 0; j < src.columns; j++)
copyComp(var, dst++, src, j);
}
}
else if(type.type == DataType::MatrixType)
{
// matrices are constructed from a list of columns
var.type = type.scalar().Type();
var.columns = RDCMAX(1U, type.matrix().count) & 0xff;
var.rows = RDCMAX(1U, type.vector().count) & 0xff;
RDCASSERTEQUAL(var.columns, construct.constituents.size());
rdcarray<ShaderVariable> columns;
columns.resize(construct.constituents.size());
for(size_t i = 0; i < construct.constituents.size(); i++)
columns[i] = GetSrc(construct.constituents[i]);
for(uint32_t r = 0; r < var.rows; r++)
for(uint32_t c = 0; c < var.columns; c++)
copyComp(var, r * var.columns + c, columns[c], r);
}
SetDst(construct.result, var);
break;
}
case Op::VectorShuffle:
{
OpVectorShuffle shuffle(it);
ShaderVariable var;
const DataType &type = debugger.GetType(shuffle.resultType);
var.type = type.scalar().Type();
var.rows = 1;
var.columns = RDCMAX(1U, (uint32_t)shuffle.components.size()) & 0xff;
ShaderVariable src1 = GetSrc(shuffle.vector1);
ShaderVariable src2 = GetSrc(shuffle.vector2);
uint32_t vec1Cols = src1.columns;
for(uint32_t i = 0; i < shuffle.components.size(); i++)
{
uint32_t c = shuffle.components[i];
// "A Component literal may also be FFFFFFFF, which means the corresponding result component
// has no source and is undefined."
// If it has no defined source, we can use 0 safely and know that it's at least going to
// index validly
if(c == ~0U)
c = 0;
if(c < vec1Cols)
copyComp(var, i, src1, c);
else
copyComp(var, i, src2, c - vec1Cols);
}
SetDst(shuffle.result, var);
break;
}
case Op::VectorExtractDynamic:
{
OpVectorExtractDynamic extract(it);
ShaderVariable var = GetSrc(extract.vector);
ShaderVariable idx = GetSrc(extract.index);
uint32_t comp = uintComp(idx, 0);
if(comp != 0)
copyComp(var, 0, var, comp);
// result is now scalar
var.columns = 1;
SetDst(extract.result, var);
break;
}
case Op::VectorInsertDynamic:
{
OpVectorInsertDynamic insert(it);
ShaderVariable var = GetSrc(insert.vector);
ShaderVariable scalar = GetSrc(insert.component);
ShaderVariable idx = GetSrc(insert.index);
uint32_t comp = uintComp(idx, 0);
copyComp(var, comp, scalar, 0);
SetDst(insert.result, var);
break;
}
case Op::Select:
{
OpSelect select(it);
// we treat this as a composite instruction for the case where the condition is a vector
ShaderVariable cond = GetSrc(select.condition);
ShaderVariable var = GetSrc(select.object1);
ShaderVariable b = GetSrc(select.object2);
if(cond.columns == 1)
{
if(uintComp(cond, 0) == 0)
var = b;
}
else
{
for(uint8_t c = 0; c < cond.columns; c++)
{
if(uintComp(cond, c) == 0)
copyComp(var, c, b, c);
}
}
SetDst(select.result, var);
break;
}
//////////////////////////////////////////////////////////////////////////////
//
// Conversion opcodes
//
//////////////////////////////////////////////////////////////////////////////
case Op::ConvertFToS:
case Op::ConvertFToU:
case Op::ConvertSToF:
case Op::ConvertUToF:
{
OpConvertFToS convert(it);
const ShaderVariable &var = GetSrc(convert.floatValue);
const DataType &resultType = debugger.GetType(convert.resultType);
ShaderVariable conv = var;
conv.type = resultType.scalar().Type();
if(opdata.op == Op::ConvertFToS)
{
for(uint8_t c = 0; c < var.columns; c++)
{
double x = 0.0;
#undef _IMPL
#define _IMPL(T) x = comp<T>(var, c);
IMPL_FOR_FLOAT_TYPES_FOR_TYPE(_IMPL, var.type);
#undef _IMPL
#define _IMPL(I, S, U) comp<S>(conv, c) = (S)x;
IMPL_FOR_INT_TYPES_FOR_TYPE(_IMPL, conv.type);
}
}
else if(opdata.op == Op::ConvertFToU)
{
for(uint8_t c = 0; c < var.columns; c++)
{
double x = 0.0;
#undef _IMPL
#define _IMPL(T) x = comp<T>(var, c);
IMPL_FOR_FLOAT_TYPES_FOR_TYPE(_IMPL, var.type);
#undef _IMPL
#define _IMPL(I, S, U) comp<U>(conv, c) = (U)x;
IMPL_FOR_INT_TYPES_FOR_TYPE(_IMPL, conv.type);
}
}
else if(opdata.op == Op::ConvertSToF)
{
for(uint8_t c = 0; c < var.columns; c++)
{
int64_t x = 0;
#undef _IMPL
#define _IMPL(I, S, U) x = comp<S>(var, c);
IMPL_FOR_INT_TYPES_FOR_TYPE(_IMPL, var.type);
if(conv.type == VarType::Float)
comp<float>(conv, c) = (float)x;
else if(conv.type == VarType::Half)
comp<half_float::half>(conv, c) = (float)x;
else if(conv.type == VarType::Double)
comp<double>(conv, c) = (double)x;
}
}
else if(opdata.op == Op::ConvertUToF)
{
for(uint8_t c = 0; c < var.columns; c++)
{
uint64_t x = 0;
#undef _IMPL
#define _IMPL(I, S, U) x = comp<U>(var, c);
IMPL_FOR_INT_TYPES_FOR_TYPE(_IMPL, var.type);
if(conv.type == VarType::Float)
comp<float>(conv, c) = (float)x;
else if(conv.type == VarType::Half)
comp<half_float::half>(conv, c) = (float)x;
else if(conv.type == VarType::Double)
comp<double>(conv, c) = (double)x;
}
}
SetDst(convert.result, conv);
break;
}
case Op::QuantizeToF16:
{
OpQuantizeToF16 quant(it);
ShaderVariable var = GetSrc(quant.value);
ShaderVariable conv = var;
// Result Type must be a scalar or vector of floating-point type. The component width must be
// 32 bits.
conv.type = VarType::Float;
for(uint8_t c = 0; c < var.columns; c++)
setFloatComp(conv, c, ConvertFromHalf(ConvertToHalf(floatComp(var, c))));
SetDst(quant.result, conv);
break;
}
case Op::UConvert:
{
OpUConvert cast(it);
const ShaderVariable &var = GetSrc(cast.unsignedValue);
const DataType &resultType = debugger.GetType(cast.resultType);
ShaderVariable conv = var;
conv.type = resultType.scalar().Type();
RDCEraseEl(conv.value);
// this is a zero-extend or truncate. Column-wise we read the variable out into a u64 then
// cast
for(uint8_t c = 0; c < var.columns; c++)
{
uint64_t x = 0;
#undef _IMPL
#define _IMPL(I, S, U) x = comp<U>(var, c);
IMPL_FOR_INT_TYPES_FOR_TYPE(_IMPL, var.type);
#undef _IMPL
#define _IMPL(I, S, U) comp<U>(conv, c) = (U)x;
IMPL_FOR_INT_TYPES_FOR_TYPE(_IMPL, conv.type);
}
SetDst(cast.result, conv);
break;
}
case Op::SConvert:
{
OpSConvert cast(it);
const ShaderVariable &var = GetSrc(cast.signedValue);
const DataType &resultType = debugger.GetType(cast.resultType);
ShaderVariable conv = var;
conv.type = resultType.scalar().Type();
RDCEraseEl(conv.value);
// this is a sign-extend or truncate. Column-wise we read the variable out into a u64 then
// cast
for(uint8_t c = 0; c < var.columns; c++)
{
int64_t x = 0;
#undef _IMPL
#define _IMPL(I, S, U) x = comp<S>(var, c);
IMPL_FOR_INT_TYPES_FOR_TYPE(_IMPL, var.type);
#undef _IMPL
#define _IMPL(I, S, U) comp<S>(conv, c) = (S)x;
IMPL_FOR_INT_TYPES_FOR_TYPE(_IMPL, conv.type);
}
SetDst(cast.result, var);
break;
}
case Op::FConvert:
{
OpFConvert cast(it);
const ShaderVariable &var = GetSrc(cast.floatValue);
const DataType &resultType = debugger.GetType(cast.resultType);
ShaderVariable conv = var;
conv.type = resultType.scalar().Type();
// we can safely upconvert to double as an intermediary because the IEEE format is the same.
// All we're doing effectively is sign extending the exponent and zero extending the mantissa.
for(uint8_t c = 0; c < var.columns; c++)
{
double x = 0.0;
#undef _IMPL
#define _IMPL(T) x = comp<T>(var, c);
IMPL_FOR_FLOAT_TYPES_FOR_TYPE(_IMPL, var.type);
#undef _IMPL
#define _IMPL(T) comp<T>(conv, c) = (T)x;
// IMPL_FOR_FLOAT_TYPES_FOR_TYPE(_IMPL, conv.type);
if(conv.type == VarType::Float)
comp<float>(conv, c) = (float)x;
else if(conv.type == VarType::Half)
comp<half_float::half>(conv, c) = (float)x;
else if(conv.type == VarType::Double)
comp<double>(conv, c) = (double)x;
}
SetDst(cast.result, conv);
break;
}
case Op::Bitcast:
{
OpBitcast cast(it);
const DataType &type = debugger.GetType(cast.resultType);
ShaderVariable var = GetSrc(cast.operand);
if(type.type == DataType::PointerType)
{
var = debugger.MakeTypedPointer(var.value.u64v[0], type);
}
else if((type.type == DataType::ScalarType && var.columns == 1) ||
type.vector().count == var.columns)
{
// if the column count is unchanged, just change the underlying type
var.type = type.scalar().Type();
}
else
{
uint32_t srcByteCount = 4;
if(var.type == VarType::Double || var.type == VarType::ULong || var.type == VarType::SLong)
srcByteCount = 8;
else if(var.type == VarType::Half || var.type == VarType::UShort ||
var.type == VarType::SShort)
srcByteCount = 2;
else if(var.type == VarType::UByte || var.type == VarType::SByte)
srcByteCount = 1;
uint32_t dstByteCount = type.scalar().width / 8;
uint32_t dstColumns = (type.type == DataType::ScalarType) ? 1 : type.vector().count;
// must be identical bit count
RDCASSERT(dstByteCount * dstColumns == srcByteCount * var.columns);
// because this is a bitcast, we leave var.value entirely alone. There is the same number of
// bytes so the union handles it. E.g. uv[0], uv[1] being bitcast to a single 64-bit
// corresponds exactly to the LSB and MSB of u64v[0]
var.type = type.scalar().Type();
var.columns = dstColumns & 0xff;
}
SetDst(cast.result, var);
break;
}
//////////////////////////////////////////////////////////////////////////////
//
// Extended instruction set handling
//
//////////////////////////////////////////////////////////////////////////////
case Op::ExtInst:
{
Id result = Id::fromWord(it.word(2));
Id extinst = Id::fromWord(it.word(3));
if(global.extInsts.find(extinst) == global.extInsts.end())
{
RDCERR("Unknown extended instruction set %u", extinst.value());
break;
}
const ExtInstDispatcher &dispatch = global.extInsts[extinst];
// ignore nonsemantic instructions
if(dispatch.nonsemantic)
break;
uint32_t instruction = it.word(4);
if(instruction >= dispatch.functions.size())
{
RDCERR("Unsupported instruction %u in set %s (only %zu instructions defined)", instruction,
dispatch.name.c_str(), dispatch.functions.size());
break;
}
if(dispatch.functions[instruction] == NULL)
{
RDCWARN("Unimplemented extended instruction %s::%s", dispatch.name.c_str(),
dispatch.names[instruction].c_str());
break;
}
rdcarray<Id> params;
for(size_t i = 5; i < it.size(); i++)
params.push_back(Id::fromWord(it.word(i)));
SetDst(result, dispatch.functions[instruction](*this, instruction, params));
break;
}
//////////////////////////////////////////////////////////////////////////////
//
// Comparison opcodes
//
//////////////////////////////////////////////////////////////////////////////
case Op::LogicalEqual:
case Op::LogicalNotEqual:
case Op::LogicalOr:
case Op::LogicalAnd:
case Op::IEqual:
case Op::INotEqual:
case Op::UGreaterThan:
case Op::UGreaterThanEqual:
case Op::ULessThan:
case Op::ULessThanEqual:
case Op::SGreaterThan:
case Op::SGreaterThanEqual:
case Op::SLessThan:
case Op::SLessThanEqual:
case Op::FOrdEqual:
case Op::FOrdNotEqual:
case Op::FOrdGreaterThan:
case Op::FOrdGreaterThanEqual:
case Op::FOrdLessThan:
case Op::FOrdLessThanEqual:
case Op::FUnordEqual:
case Op::FUnordNotEqual:
case Op::FUnordGreaterThan:
case Op::FUnordGreaterThanEqual:
case Op::FUnordLessThan:
case Op::FUnordLessThanEqual:
{
OpFMul compare(it);
ShaderVariable a = GetSrc(compare.operand1);
ShaderVariable b = GetSrc(compare.operand2);
ShaderVariable var = a;
if(opdata.op == Op::IEqual || opdata.op == Op::LogicalEqual)
{
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(I, S, U) comp<U>(var, c) = comp<I>(a, c) == comp<I>(b, c) ? 1 : 0
IMPL_FOR_INT_TYPES(_IMPL);
}
}
else if(opdata.op == Op::INotEqual || opdata.op == Op::LogicalNotEqual)
{
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(I, S, U) comp<U>(var, c) = comp<I>(a, c) != comp<I>(b, c) ? 1 : 0
IMPL_FOR_INT_TYPES(_IMPL);
}
}
else if(opdata.op == Op::LogicalAnd)
{
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(I, S, U) comp<U>(var, c) = comp<I>(a, c) & comp<I>(b, c)
IMPL_FOR_INT_TYPES(_IMPL);
}
}
else if(opdata.op == Op::LogicalOr)
{
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(I, S, U) comp<U>(var, c) = comp<I>(a, c) | comp<I>(b, c)
IMPL_FOR_INT_TYPES(_IMPL);
}
}
else if(opdata.op == Op::UGreaterThan)
{
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(I, S, U) comp<U>(var, c) = comp<U>(a, c) > comp<U>(b, c) ? 1 : 0
IMPL_FOR_INT_TYPES(_IMPL);
}
}
else if(opdata.op == Op::UGreaterThanEqual)
{
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(I, S, U) comp<U>(var, c) = comp<U>(a, c) >= comp<U>(b, c) ? 1 : 0
IMPL_FOR_INT_TYPES(_IMPL);
}
}
else if(opdata.op == Op::ULessThan)
{
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(I, S, U) comp<U>(var, c) = comp<U>(a, c) < comp<U>(b, c) ? 1 : 0
IMPL_FOR_INT_TYPES(_IMPL);
}
}
else if(opdata.op == Op::ULessThanEqual)
{
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(I, S, U) comp<U>(var, c) = comp<U>(a, c) <= comp<U>(b, c) ? 1 : 0
IMPL_FOR_INT_TYPES(_IMPL);
}
}
else if(opdata.op == Op::SGreaterThan)
{
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(I, S, U) comp<U>(var, c) = comp<S>(a, c) > comp<S>(b, c) ? 1 : 0
IMPL_FOR_INT_TYPES(_IMPL);
}
}
else if(opdata.op == Op::SGreaterThanEqual)
{
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(I, S, U) comp<U>(var, c) = comp<S>(a, c) >= comp<S>(b, c) ? 1 : 0
IMPL_FOR_INT_TYPES(_IMPL);
}
}
else if(opdata.op == Op::SLessThan)
{
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(I, S, U) comp<U>(var, c) = comp<S>(a, c) < comp<S>(b, c) ? 1 : 0
IMPL_FOR_INT_TYPES(_IMPL);
}
}
else if(opdata.op == Op::SLessThanEqual)
{
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(I, S, U) comp<U>(var, c) = comp<S>(a, c) <= comp<S>(b, c) ? 1 : 0
IMPL_FOR_INT_TYPES(_IMPL);
}
}
// FOrd are all "Floating-point comparison if operands are ordered and Operand 1 is ... than
// Operand 2.".
// Since NaN is the only unordered value, and NaN comparisons are always false, we can take
// advantage of that by FOrd just being straight comparisons. If the operands are unordered
// (i.e. one is NaN) then the FOrd variatns return false as expected.
//
// FUnord are all "Floating-point comparison if operands are unordered or Operand 1 is ...
// than Operand 2."
// Again as above, any comparison with unordered comparisons will return false. Since we want
// 'or are unordered' then we want to negate the comparison so that unordered comparisons will
// always return true. So we negate and invert the actual comparison so that the comparison
// will be unchanged effectively.
if(opdata.op == Op::FOrdEqual)
{
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(T) comp<uint32_t>(var, c) = (comp<T>(a, c) == comp<T>(b, c)) ? 1 : 0
IMPL_FOR_FLOAT_TYPES(_IMPL);
}
}
else if(opdata.op == Op::FOrdNotEqual)
{
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(T) comp<uint32_t>(var, c) = (comp<T>(a, c) != comp<T>(b, c)) ? 1 : 0
IMPL_FOR_FLOAT_TYPES(_IMPL);
}
}
else if(opdata.op == Op::FOrdGreaterThan)
{
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(T) comp<uint32_t>(var, c) = (comp<T>(a, c) > comp<T>(b, c)) ? 1 : 0
IMPL_FOR_FLOAT_TYPES(_IMPL);
}
}
else if(opdata.op == Op::FOrdGreaterThanEqual)
{
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(T) comp<uint32_t>(var, c) = (comp<T>(a, c) >= comp<T>(b, c)) ? 1 : 0
IMPL_FOR_FLOAT_TYPES(_IMPL);
}
}
else if(opdata.op == Op::FOrdLessThan)
{
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(T) comp<uint32_t>(var, c) = (comp<T>(a, c) < comp<T>(b, c)) ? 1 : 0
IMPL_FOR_FLOAT_TYPES(_IMPL);
}
}
else if(opdata.op == Op::FOrdLessThanEqual)
{
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(T) comp<uint32_t>(var, c) = (comp<T>(a, c) <= comp<T>(b, c)) ? 1 : 0
IMPL_FOR_FLOAT_TYPES(_IMPL);
}
}
if(opdata.op == Op::FUnordEqual)
{
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(T) comp<uint32_t>(var, c) = (comp<T>(a, c) != comp<T>(b, c)) ? 0 : 1
IMPL_FOR_FLOAT_TYPES(_IMPL);
}
}
else if(opdata.op == Op::FUnordNotEqual)
{
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(T) comp<uint32_t>(var, c) = (comp<T>(a, c) == comp<T>(b, c)) ? 0 : 1
IMPL_FOR_FLOAT_TYPES(_IMPL);
}
}
else if(opdata.op == Op::FUnordGreaterThan)
{
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(T) comp<uint32_t>(var, c) = (comp<T>(a, c) <= comp<T>(b, c)) ? 0 : 1
IMPL_FOR_FLOAT_TYPES(_IMPL);
}
}
else if(opdata.op == Op::FUnordGreaterThanEqual)
{
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(T) comp<uint32_t>(var, c) = (comp<T>(a, c) < comp<T>(b, c)) ? 0 : 1
IMPL_FOR_FLOAT_TYPES(_IMPL);
}
}
else if(opdata.op == Op::FUnordLessThan)
{
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(T) comp<uint32_t>(var, c) = (comp<T>(a, c) >= comp<T>(b, c)) ? 0 : 1
IMPL_FOR_FLOAT_TYPES(_IMPL);
}
}
else if(opdata.op == Op::FUnordLessThanEqual)
{
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(T) comp<uint32_t>(var, c) = (comp<T>(a, c) <= comp<T>(b, c)) ? 0 : 1
IMPL_FOR_FLOAT_TYPES(_IMPL);
}
}
var.type = VarType::Bool;
SetDst(compare.result, var);
break;
}
case Op::LogicalNot:
{
OpLogicalNot negate(it);
ShaderVariable var = GetSrc(negate.operand);
for(uint8_t c = 0; c < var.columns; c++)
setUintComp(var, c, 1U - uintComp(var, c));
var.type = VarType::Bool;
SetDst(negate.result, var);
break;
}
case Op::Any:
case Op::All:
{
OpAny any(it);
ShaderVariable var = GetSrc(any.vector);
for(uint8_t c = 1; c < var.columns; c++)
{
if(opdata.op == Op::Any)
setUintComp(var, 0, uintComp(var, 0) | uintComp(var, c));
else
setUintComp(var, 0, uintComp(var, 0) & uintComp(var, c));
}
var.columns = 1;
SetDst(any.result, var);
break;
}
case Op::IsNan:
{
OpIsNan is(it);
ShaderVariable x = GetSrc(is.x);
ShaderVariable var = x;
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(T) setUintComp(var, c, RDCISNAN(comp<T>(x, c)) ? 1 : 0)
IMPL_FOR_FLOAT_TYPES(_IMPL);
}
var.type = VarType::Bool;
SetDst(is.result, var);
break;
}
case Op::IsInf:
{
OpIsNan is(it);
ShaderVariable x = GetSrc(is.x);
ShaderVariable var = x;
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(T) setUintComp(var, c, RDCISINF(comp<T>(x, c)) ? 1 : 0);
IMPL_FOR_FLOAT_TYPES(_IMPL);
}
var.type = VarType::Bool;
SetDst(is.result, var);
break;
}
//////////////////////////////////////////////////////////////////////////////
//
// Bitwise/logical opcodes
//
//////////////////////////////////////////////////////////////////////////////
case Op::BitCount:
{
OpBitCount bitwise(it);
const DataType &type = debugger.GetType(bitwise.resultType);
ShaderVariable var = GetSrc(bitwise.base);
ShaderVariable ret = var;
ret.type = type.scalar().Type();
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(I, S, U) setUintComp(ret, c, (uint32_t)Bits::CountOnes(comp<U>(var, c)));
IMPL_FOR_INT_TYPES(_IMPL);
}
SetDst(bitwise.result, ret);
break;
}
case Op::BitReverse:
{
OpBitReverse bitwise(it);
ShaderVariable var = GetSrc(bitwise.base);
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(I, S, U) \
U v = comp<U>(var, c); \
comp<U>(var, c) = 0; \
for(uint8_t b = 0; b < 32; b++) \
{ \
uint32_t bit = (v >> b) & 0x1; \
comp<U>(var, c) |= bit << (31 - b); \
}
IMPL_FOR_INT_TYPES(_IMPL);
}
SetDst(bitwise.result, var);
break;
}
case Op::BitFieldUExtract:
case Op::BitFieldSExtract:
{
OpBitFieldUExtract bitwise(it);
ShaderVariable var = GetSrc(bitwise.base);
ShaderVariable offset = GetSrc(bitwise.offset);
ShaderVariable count = GetSrc(bitwise.count);
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(I, S, U) \
const U mask = (U(1) << comp<U>(count, c)) - U(1); \
\
comp<U>(var, c) >>= comp<U>(offset, c); \
comp<U>(var, c) &= mask; \
\
if(opdata.op == Op::BitFieldSExtract) \
{ \
U topbit = (mask + U(1)) >> U(1); \
if(comp<U>(var, c) & topbit) \
comp<U>(var, c) |= (~0ULL ^ mask); \
}
IMPL_FOR_INT_TYPES(_IMPL);
}
SetDst(bitwise.result, var);
break;
}
case Op::BitFieldInsert:
{
OpBitFieldInsert bitwise(it);
ShaderVariable var = GetSrc(bitwise.base);
ShaderVariable insert = GetSrc(bitwise.insert);
ShaderVariable offset = GetSrc(bitwise.offset);
ShaderVariable count = GetSrc(bitwise.count);
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(I, S, U) \
const U mask = (U(1) << comp<U>(count, c)) - U(1); \
\
comp<U>(var, c) &= ~(mask << comp<U>(offset, c)); \
comp<U>(var, c) |= (comp<U>(insert, c) & mask) << comp<U>(offset, c);
IMPL_FOR_INT_TYPES(_IMPL);
}
SetDst(bitwise.result, var);
break;
}
case Op::BitwiseOr:
case Op::BitwiseAnd:
case Op::BitwiseXor:
case Op::ShiftLeftLogical:
case Op::ShiftRightArithmetic:
case Op::ShiftRightLogical:
{
OpBitwiseOr bitwise(it);
ShaderVariable var = GetSrc(bitwise.operand1);
ShaderVariable b = GetSrc(bitwise.operand2);
if(opdata.op == Op::BitwiseOr)
{
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(I, S, U) comp<U>(var, c) = comp<U>(var, c) | comp<U>(b, c)
IMPL_FOR_INT_TYPES(_IMPL);
}
}
else if(opdata.op == Op::BitwiseAnd)
{
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(I, S, U) comp<U>(var, c) = comp<U>(var, c) & comp<U>(b, c)
IMPL_FOR_INT_TYPES(_IMPL);
}
}
else if(opdata.op == Op::BitwiseXor)
{
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(I, S, U) comp<U>(var, c) = comp<U>(var, c) ^ comp<U>(b, c)
IMPL_FOR_INT_TYPES(_IMPL);
}
}
else if(opdata.op == Op::ShiftLeftLogical)
{
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(I, S, U) comp<U>(var, c) = comp<U>(var, c) << comp<U>(b, c)
IMPL_FOR_INT_TYPES(_IMPL);
}
}
else if(opdata.op == Op::ShiftRightArithmetic)
{
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(I, S, U) comp<S>(var, c) = comp<S>(var, c) >> comp<S>(b, c)
IMPL_FOR_INT_TYPES(_IMPL);
}
}
else if(opdata.op == Op::ShiftRightLogical)
{
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(I, S, U) comp<U>(var, c) = comp<U>(var, c) >> comp<U>(b, c)
IMPL_FOR_INT_TYPES(_IMPL);
}
}
SetDst(bitwise.result, var);
break;
}
case Op::GroupNonUniformBitwiseOr:
{
OpGroupNonUniformBitwiseOr group(it);
ShaderVariable var;
for(size_t i = 0; i < workgroup.size(); i++)
{
if(i == 0)
{
var = workgroup[i].GetSrc(group.value);
}
else
{
ShaderVariable b = workgroup[i].GetSrc(group.value);
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(I, S, U) comp<U>(var, c) = comp<U>(var, c) | comp<U>(b, c)
IMPL_FOR_INT_TYPES(_IMPL);
}
}
}
SetDst(group.result, var);
break;
}
case Op::Not:
{
OpNot bitwise(it);
ShaderVariable var = GetSrc(bitwise.operand);
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(I, S, U) comp<U>(var, c) = ~comp<U>(var, c)
IMPL_FOR_INT_TYPES(_IMPL);
}
SetDst(bitwise.result, var);
break;
}
//////////////////////////////////////////////////////////////////////////////
//
// Mathematical opcodes
//
//////////////////////////////////////////////////////////////////////////////
case Op::FMul:
case Op::FDiv:
case Op::FMod:
case Op::FRem:
case Op::FAdd:
case Op::FSub:
case Op::IMul:
case Op::SDiv:
case Op::UDiv:
case Op::UMod:
case Op::SMod:
case Op::SRem:
case Op::IAdd:
case Op::ISub:
{
OpFMul math(it);
ShaderVariable var = GetSrc(math.operand1);
ShaderVariable b = GetSrc(math.operand2);
if(opdata.op == Op::FMul)
{
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(T) comp<T>(var, c) *= comp<T>(b, c)
IMPL_FOR_FLOAT_TYPES(_IMPL);
}
}
else if(opdata.op == Op::FDiv)
{
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(T) comp<T>(var, c) /= comp<T>(b, c)
IMPL_FOR_FLOAT_TYPES(_IMPL);
}
}
else if(opdata.op == Op::FMod)
{
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(T) \
T af = comp<T>(var, c), bf = comp<T>(b, c); \
comp<T>(var, c) = fmod(af, bf); \
if(comp<T>(var, c) < 0.0f && bf >= 0.0f) \
comp<T>(var, c) += fabs(bf); \
else if(comp<T>(var, c) >= 0.0f && bf < 0.0f) \
comp<T>(var, c) -= fabs(bf);
IMPL_FOR_FLOAT_TYPES(_IMPL);
}
}
else if(opdata.op == Op::FRem)
{
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(T) \
T af = comp<T>(var, c), bf = comp<T>(b, c); \
comp<T>(var, c) = fmod(af, bf); \
if(comp<T>(var, c) < 0.0f && af >= 0.0f) \
comp<T>(var, c) += fabs(bf); \
else if(comp<T>(var, c) >= 0.0f && af < 0.0f) \
comp<T>(var, c) -= fabs(bf);
IMPL_FOR_FLOAT_TYPES(_IMPL);
}
}
else if(opdata.op == Op::FAdd)
{
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(T) comp<T>(var, c) += comp<T>(b, c)
IMPL_FOR_FLOAT_TYPES(_IMPL);
}
}
else if(opdata.op == Op::FSub)
{
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(T) comp<T>(var, c) -= comp<T>(b, c)
IMPL_FOR_FLOAT_TYPES(_IMPL);
}
}
else if(opdata.op == Op::IMul)
{
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(I, S, U) comp<I>(var, c) *= comp<I>(b, c)
IMPL_FOR_INT_TYPES(_IMPL);
}
}
else if(opdata.op == Op::SDiv)
{
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(I, S, U) \
if(comp<S>(b, c) != 0) \
{ \
comp<S>(var, c) /= comp<S>(b, c); \
} \
else \
{ \
comp<U>(var, c) = 0; \
if(m_State) \
m_State->flags |= ShaderEvents::GeneratedNanOrInf; \
}
IMPL_FOR_INT_TYPES(_IMPL);
}
}
else if(opdata.op == Op::UDiv)
{
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(I, S, U) \
if(comp<U>(b, c) != 0) \
{ \
comp<U>(var, c) /= comp<U>(b, c); \
} \
else \
{ \
comp<U>(var, c) = 0; \
if(m_State) \
m_State->flags |= ShaderEvents::GeneratedNanOrInf; \
}
IMPL_FOR_INT_TYPES(_IMPL);
}
}
else if(opdata.op == Op::UMod)
{
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(I, S, U) \
if(comp<U>(b, c) != 0) \
{ \
comp<U>(var, c) %= comp<U>(b, c); \
} \
else \
{ \
comp<U>(var, c) = 0; \
if(m_State) \
m_State->flags |= ShaderEvents::GeneratedNanOrInf; \
}
IMPL_FOR_INT_TYPES(_IMPL);
}
}
else if(opdata.op == Op::SRem || opdata.op == Op::SMod)
{
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(I, S, U) \
if(comp<S>(b, c) != 0) \
{ \
comp<S>(var, c) %= comp<S>(b, c); \
} \
else \
{ \
comp<S>(var, c) = 0; \
if(m_State) \
m_State->flags |= ShaderEvents::GeneratedNanOrInf; \
}
IMPL_FOR_INT_TYPES(_IMPL);
}
}
else if(opdata.op == Op::IAdd)
{
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(I, S, U) comp<I>(var, c) += comp<I>(b, c)
IMPL_FOR_INT_TYPES(_IMPL);
}
}
else if(opdata.op == Op::ISub)
{
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(I, S, U) comp<I>(var, c) -= comp<I>(b, c)
IMPL_FOR_INT_TYPES(_IMPL);
}
}
SetDst(math.result, var);
break;
}
// extended math ops
case Op::UMulExtended:
case Op::SMulExtended:
case Op::IAddCarry:
case Op::ISubBorrow:
{
OpUMulExtended math(it);
ShaderVariable a = GetSrc(math.operand1);
ShaderVariable b = GetSrc(math.operand2);
ShaderVariable lsb = a;
ShaderVariable msb = a;
uint32_t elemSize = VarTypeByteSize(a.type);
uint32_t elemBits = elemSize * 8;
if(opdata.op == Op::UMulExtended)
{
// if this is less than 64-bit precision inputs, we can just upcast, do the mul, and then
// mask off the bits we care about
if(elemSize < 8)
{
uint32_t mask = 0xFFFFFFFFu >> (32 - elemBits);
for(uint8_t c = 0; c < a.columns; c++)
{
const uint64_t x = uintComp(a, c);
const uint64_t y = uintComp(b, c);
const uint64_t res = x * y;
setUintComp(lsb, c, uint32_t(res & mask));
setUintComp(msb, c, uint32_t(res >> elemBits));
}
}
else
{
RDCERR("Unsupported UMulExtended on 64-bit operands");
}
}
else if(opdata.op == Op::SMulExtended)
{
if(elemSize < 8)
{
uint32_t mask = 0xFFFFFFFFu >> (32 - elemBits);
for(uint8_t c = 0; c < a.columns; c++)
{
const int64_t x = intComp(a, c);
const int64_t y = intComp(b, c);
const int64_t res = x * y;
setIntComp(lsb, c, int32_t(res & mask));
setIntComp(msb, c, int32_t(res >> elemBits));
}
}
else
{
RDCERR("Unsupported SMulExtended on 64-bit operands");
}
}
else if(opdata.op == Op::IAddCarry)
{
for(uint8_t c = 0; c < a.columns; c++)
{
// unsigned overflow is well-defined to wrap around, giving us the lsb we want.
// if the result is less than one of the operands, we overflowed so set msb
#undef _IMPL
#define _IMPL(I, S, U) \
comp<U>(lsb, c) = comp<U>(a, c) + comp<U>(b, c); \
comp<U>(msb, c) = (comp<U>(lsb, c) < comp<U>(b, c)) ? 1 : 0;
IMPL_FOR_INT_TYPES_FOR_TYPE(_IMPL, a.type);
}
}
else if(opdata.op == Op::ISubBorrow)
{
for(uint8_t c = 0; c < a.columns; c++)
{
// if b <= a we don't need to borrow, otherwise set the borrow bit
#undef _IMPL
#define _IMPL(I, S, U) \
if(comp<U>(b, c) <= comp<U>(a, c)) \
{ \
comp<U>(msb, c) = 0; \
comp<U>(lsb, c) = comp<U>(a, c) - comp<U>(b, c); \
} \
else \
{ \
comp<U>(msb, c) = 1; \
comp<U>(lsb, c) = ~0ULL - (comp<U>(b, c) - comp<U>(a, c) - 1U); \
}
IMPL_FOR_INT_TYPES_FOR_TYPE(_IMPL, a.type);
}
}
ShaderVariable result;
result.rows = 1;
result.columns = 1;
result.type = VarType::Struct;
result.members = {lsb, msb};
result.members[0].name = "lsb";
result.members[1].name = "msb";
SetDst(math.result, result);
break;
}
case Op::FNegate:
case Op::SNegate:
{
OpFNegate math(it);
ShaderVariable var = GetSrc(math.operand);
if(opdata.op == Op::FNegate)
{
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(T) comp<T>(var, c) = -comp<T>(var, c)
IMPL_FOR_FLOAT_TYPES(_IMPL);
}
}
else if(opdata.op == Op::SNegate)
{
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(I, S, U) comp<S>(var, c) = -comp<S>(var, c)
IMPL_FOR_INT_TYPES(_IMPL);
}
}
SetDst(math.result, var);
break;
}
case Op::Dot:
{
OpDot dot(it);
ShaderVariable var = GetSrc(dot.vector1);
ShaderVariable b = GetSrc(dot.vector2);
RDCASSERTEQUAL(var.columns, b.columns);
#undef _IMPL
#define _IMPL(T) \
T ret(0.0); \
for(uint8_t c = 0; c < var.columns; c++) \
ret += comp<T>(var, c) * comp<T>(b, c); \
comp<T>(var, 0) = ret;
IMPL_FOR_FLOAT_TYPES(_IMPL);
var.columns = 1;
SetDst(dot.result, var);
break;
}
case Op::VectorTimesScalar:
{
OpVectorTimesScalar mul(it);
ShaderVariable var = GetSrc(mul.vector);
ShaderVariable scalar = GetSrc(mul.scalar);
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(T) comp<T>(var, c) *= comp<T>(scalar, 0)
IMPL_FOR_FLOAT_TYPES(_IMPL);
}
SetDst(mul.result, var);
break;
}
case Op::MatrixTimesScalar:
{
OpMatrixTimesScalar mul(it);
ShaderVariable var = GetSrc(mul.matrix);
ShaderVariable scalar = GetSrc(mul.scalar);
for(uint8_t c = 0; c < var.rows * var.columns; c++)
{
#undef _IMPL
#define _IMPL(T) comp<T>(var, c) *= comp<T>(scalar, 0)
IMPL_FOR_FLOAT_TYPES(_IMPL);
}
SetDst(mul.result, var);
break;
}
case Op::VectorTimesMatrix:
{
OpVectorTimesMatrix mul(it);
ShaderVariable matrix = GetSrc(mul.matrix);
ShaderVariable vector = GetSrc(mul.vector);
ShaderVariable var = vector;
var.columns = matrix.columns;
const DataType &type = debugger.GetType(mul.resultType);
RDCASSERTEQUAL(type.vector().count, var.columns);
RDCASSERTEQUAL(matrix.rows, vector.columns);
for(uint8_t c = 0; c < matrix.columns; c++)
{
#undef _IMPL
#define _IMPL(T) \
comp<T>(var, c) = 0.0; \
for(uint8_t r = 0; r < matrix.rows; r++) \
comp<T>(var, c) += comp<T>(matrix, r * matrix.columns + c) * comp<T>(vector, r);
IMPL_FOR_FLOAT_TYPES(_IMPL);
}
SetDst(mul.result, var);
break;
}
case Op::Transpose:
{
OpTranspose transpose(it);
ShaderVariable matrix = GetSrc(transpose.matrix);
ShaderVariable var = matrix;
std::swap(var.rows, var.columns);
for(uint8_t r = 0; r < var.rows; r++)
{
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(T) comp<T>(var, r * var.columns + c) = comp<T>(matrix, c * matrix.columns + r)
IMPL_FOR_FLOAT_TYPES(_IMPL);
}
}
SetDst(transpose.result, var);
break;
}
case Op::MatrixTimesVector:
{
OpMatrixTimesVector mul(it);
ShaderVariable matrix = GetSrc(mul.matrix);
ShaderVariable vector = GetSrc(mul.vector);
ShaderVariable var = vector;
var.columns = matrix.rows;
const DataType &type = debugger.GetType(mul.resultType);
RDCASSERTEQUAL(type.vector().count, var.columns);
RDCASSERTEQUAL(matrix.columns, vector.columns);
for(uint8_t r = 0; r < matrix.rows; r++)
{
#undef _IMPL
#define _IMPL(T) \
comp<T>(var, r) = 0.0; \
for(uint8_t c = 0; c < matrix.columns; c++) \
comp<T>(var, r) += comp<T>(matrix, r * matrix.columns + c) * comp<T>(vector, c);
IMPL_FOR_FLOAT_TYPES(_IMPL);
}
SetDst(mul.result, var);
break;
}
case Op::MatrixTimesMatrix:
{
OpMatrixTimesMatrix mul(it);
ShaderVariable left = GetSrc(mul.leftMatrix);
ShaderVariable right = GetSrc(mul.rightMatrix);
ShaderVariable var = left;
var.rows = left.rows;
var.columns = right.columns;
RDCASSERTEQUAL(left.columns, right.rows);
for(uint8_t dstr = 0; dstr < var.rows; dstr++)
{
for(uint8_t dstc = 0; dstc < var.columns; dstc++)
{
#undef _IMPL
#define _IMPL(T) \
T &dstval = comp<T>(var, dstr * var.columns + dstc); \
dstval = 0.0; \
\
for(uint8_t src = 0; src < right.rows; src++) \
dstval += comp<T>(left, dstr * left.columns + src) * comp<T>(right, src * right.columns + dstc);
IMPL_FOR_FLOAT_TYPES(_IMPL);
}
}
SetDst(mul.result, var);
break;
}
case Op::OuterProduct:
{
OpOuterProduct mul(it);
ShaderVariable left = GetSrc(mul.vector1);
ShaderVariable right = GetSrc(mul.vector2);
ShaderVariable var = left;
var.rows = left.columns;
var.columns = right.columns;
for(uint8_t r = 0; r < var.rows; r++)
{
for(uint8_t c = 0; c < var.columns; c++)
{
#undef _IMPL
#define _IMPL(T) comp<T>(var, r * var.columns + c) = comp<T>(left, r) * comp<T>(right, c);
IMPL_FOR_FLOAT_TYPES(_IMPL);
}
}
SetDst(mul.result, var);
break;
}
//////////////////////////////////////////////////////////////////////////////
//
// Image opcodes
//
//////////////////////////////////////////////////////////////////////////////
case Op::SampledImage:
{
OpSampledImage sampled(it);
// we make a little struct out of the combination
ShaderVariable result;
result.rows = 1;
result.columns = 1;
result.type = VarType::Struct;
result.members = {GetSrc(sampled.image), GetSrc(sampled.sampler)};
result.members[0].name = "image";
result.members[1].name = "sampler";
SetDst(opdata.result, result);
break;
}
case Op::Image:
{
OpImage image(it);
ShaderVariable var = GetSrc(image.sampledImage);
// if this is a struct, pull out the image. Otherwise leave it alone because it's just a
// reference to a binding which we use as-is.
if(!var.members.empty())
var = var.members[0];
SetDst(image.result, var);
break;
}
case Op::ImageQueryLevels:
case Op::ImageQuerySamples:
case Op::ImageQuerySize:
case Op::ImageQuerySizeLod:
case Op::ImageFetch:
case Op::ImageGather:
case Op::ImageDrefGather:
case Op::ImageQueryLod:
case Op::ImageSampleExplicitLod:
case Op::ImageSampleImplicitLod:
case Op::ImageSampleDrefExplicitLod:
case Op::ImageSampleDrefImplicitLod:
case Op::ImageSampleProjExplicitLod:
case Op::ImageSampleProjImplicitLod:
case Op::ImageSampleProjDrefExplicitLod:
case Op::ImageSampleProjDrefImplicitLod:
{
ShaderVariable img;
ShaderVariable sampler;
ShaderVariable uv;
ShaderVariable ddxCalc;
ShaderVariable ddyCalc;
ShaderVariable compare;
ImageOperandsAndParamDatas operands;
GatherChannel gather = GatherChannel::Red;
Id derivId;
if(opdata.op == Op::ImageFetch)
{
OpImageFetch image(it);
img = GetSrc(image.image);
uv = GetSrc(image.coordinate);
operands = image.imageOperands;
}
else if(opdata.op == Op::ImageGather)
{
OpImageGather image(it);
sampler = img = GetSrc(image.sampledImage);
uv = GetSrc(image.coordinate);
gather = GatherChannel(uintComp(GetSrc(image.component), 0));
operands = image.imageOperands;
}
else if(opdata.op == Op::ImageDrefGather)
{
OpImageDrefGather image(it);
sampler = img = GetSrc(image.sampledImage);
uv = GetSrc(image.coordinate);
operands = image.imageOperands;
gather = GatherChannel::Red;
compare = GetSrc(image.dref);
}
else if(opdata.op == Op::ImageQueryLod)
{
OpImageQueryLod image(it);
sampler = img = GetSrc(image.sampledImage);
uv = GetSrc(image.coordinate);
derivId = image.coordinate;
}
else if(opdata.op == Op::ImageSampleExplicitLod)
{
OpImageSampleExplicitLod image(it);
sampler = img = GetSrc(image.sampledImage);
uv = GetSrc(image.coordinate);
operands = image.imageOperands;
}
else if(opdata.op == Op::ImageSampleImplicitLod)
{
OpImageSampleImplicitLod image(it);
sampler = img = GetSrc(image.sampledImage);
uv = GetSrc(image.coordinate);
operands = image.imageOperands;
derivId = image.coordinate;
}
else if(opdata.op == Op::ImageSampleDrefExplicitLod)
{
OpImageSampleDrefExplicitLod image(it);
sampler = img = GetSrc(image.sampledImage);
uv = GetSrc(image.coordinate);
operands = image.imageOperands;
compare = GetSrc(image.dref);
}
else if(opdata.op == Op::ImageSampleDrefImplicitLod)
{
OpImageSampleDrefImplicitLod image(it);
sampler = img = GetSrc(image.sampledImage);
uv = GetSrc(image.coordinate);
operands = image.imageOperands;
compare = GetSrc(image.dref);
derivId = image.coordinate;
}
else if(opdata.op == Op::ImageSampleProjExplicitLod)
{
OpImageSampleProjExplicitLod image(it);
sampler = img = GetSrc(image.sampledImage);
uv = GetSrc(image.coordinate);
operands = image.imageOperands;
}
else if(opdata.op == Op::ImageSampleProjImplicitLod)
{
OpImageSampleProjImplicitLod image(it);
sampler = img = GetSrc(image.sampledImage);
uv = GetSrc(image.coordinate);
operands = image.imageOperands;
derivId = image.coordinate;
}
else if(opdata.op == Op::ImageSampleProjDrefExplicitLod)
{
OpImageSampleProjDrefExplicitLod image(it);
sampler = img = GetSrc(image.sampledImage);
uv = GetSrc(image.coordinate);
operands = image.imageOperands;
compare = GetSrc(image.dref);
}
else if(opdata.op == Op::ImageSampleProjDrefImplicitLod)
{
OpImageSampleProjDrefImplicitLod image(it);
sampler = img = GetSrc(image.sampledImage);
uv = GetSrc(image.coordinate);
operands = image.imageOperands;
compare = GetSrc(image.dref);
derivId = image.coordinate;
}
else if(opdata.op == Op::ImageQueryLevels || opdata.op == Op::ImageQuerySamples ||
opdata.op == Op::ImageQuerySize)
{
// these opcodes are all identical, they just query a property of the image
OpImageQueryLevels query(it);
img = GetSrc(query.image);
}
else if(opdata.op == Op::ImageQuerySizeLod)
{
OpImageQuerySizeLod query(it);
img = GetSrc(query.image);
operands.setLod(query.levelofDetail);
}
if(derivId != Id())
{
// calculate DDX/DDY in coarse fashion
ddxCalc = CalcDeriv(DDX, Coarse, workgroup, derivId);
ddyCalc = CalcDeriv(DDY, Coarse, workgroup, derivId);
}
// if we have a dynamically combined image sampler, split it up here
if(!img.members.empty() && !sampler.members.empty())
{
img = img.members[0];
sampler = sampler.members[1];
}
const DataType &resultType = debugger.GetType(opdata.resultType);
RDCASSERT(img.type == VarType::ReadOnlyResource || img.type == VarType::ReadWriteResource);
RDCASSERT(sampler.type == VarType::Unknown || sampler.type == VarType::ReadOnlyResource ||
sampler.type == VarType::Sampler);
// at setup time we stored the texture type for easy access here
DebugAPIWrapper::TextureType texType = debugger.GetTextureType(img);
// should not be sampling or fetching from subpass textures
RDCASSERT((texType & DebugAPIWrapper::Subpass_Texture) == 0);
ShaderVariable result;
result.type = resultType.scalar().Type();
ShaderBindIndex samplerIndex;
if(sampler.type == VarType::Sampler || sampler.type == VarType::ReadOnlyResource)
samplerIndex = sampler.GetBindIndex();
if(!debugger.GetAPIWrapper()->CalculateSampleGather(
*this, opdata.op, texType, img.GetBindIndex(), samplerIndex, uv, ddxCalc, ddyCalc,
compare, gather, operands, result))
{
// sample failed. Pretend we got 0 columns back
set0001(result);
}
result.rows = 1;
result.columns = RDCMAX(1U, resultType.vector().count) & 0xff;
SetDst(opdata.result, result);
break;
}
case Op::ImageRead:
{
OpImageRead read(it);
ShaderVariable img = GetSrc(read.image);
ShaderVariable coord = GetSrc(read.coordinate);
const DataType &resultType = debugger.GetType(opdata.resultType);
// only the sample operand should be here
RDCASSERT((read.imageOperands.flags & ImageOperands::Sample) == read.imageOperands.flags);
ShaderVariable result;
result.type = resultType.scalar().Type();
result.rows = 1;
result.columns = RDCMAX(1U, resultType.vector().count) & 0xff;
DebugAPIWrapper::TextureType texType = debugger.GetTextureType(img);
if(texType & DebugAPIWrapper::Subpass_Texture)
{
// get current position
ShaderVariable curCoord(rdcstr(), 0.0f, 0.0f, 0.0f, 0.0f);
debugger.GetAPIWrapper()->FillInputValue(curCoord, ShaderBuiltin::Position, 0, 0);
// co-ords are relative to the current position
setUintComp(coord, 0, uintComp(coord, 0) + (uint32_t)floatComp(curCoord, 0));
setUintComp(coord, 1, uintComp(coord, 1) + (uint32_t)floatComp(curCoord, 1));
// do it with samplegather as ImageFetch rather than a Read which caches the whole texture
// on the CPU for no reason (since we can't write to it)
if(!debugger.GetAPIWrapper()->CalculateSampleGather(
*this, Op::ImageFetch, texType, img.GetBindIndex(), ShaderBindIndex(), coord,
ShaderVariable(), ShaderVariable(), ShaderVariable(), GatherChannel::Red,
ImageOperandsAndParamDatas(), result))
{
// sample failed. Pretend we got 0 columns back
set0001(result);
}
}
else
{
if(!debugger.GetAPIWrapper()->ReadTexel(img.GetBindIndex(), coord,
read.imageOperands.flags & ImageOperands::Sample
? uintComp(GetSrc(read.imageOperands.sample), 0)
: 0,
result))
{
// sample failed. Pretend we got 0 columns back
set0001(result);
}
}
SetDst(read.result, result);
break;
}
case Op::ImageWrite:
{
OpImageWrite write(it);
ShaderVariable img = GetSrc(write.image);
ShaderVariable coord = GetSrc(write.coordinate);
ShaderVariable texel = GetSrc(write.texel);
// only the sample operand should be here
RDCASSERT((write.imageOperands.flags & ImageOperands::Sample) == write.imageOperands.flags);
debugger.GetAPIWrapper()->WriteTexel(img.GetBindIndex(), coord,
write.imageOperands.flags & ImageOperands::Sample
? uintComp(GetSrc(write.imageOperands.sample), 0)
: 0,
texel);
break;
}
//////////////////////////////////////////////////////////////////////////////
//
// Block flow control opcodes
//
//////////////////////////////////////////////////////////////////////////////
case Op::MemoryBarrier:
case Op::ControlBarrier:
{
// do nothing for now
break;
}
case Op::Label:
case Op::SelectionMerge:
case Op::LoopMerge:
{
// we shouldn't process these, we should always jump past them
RDCERR("Unexpected %s", ToStr(opdata.op).c_str());
break;
}
case Op::Switch:
{
OpSwitch32 switch32(it);
// selector and default are common beteen 32-bit and 64-bit versions of OpSwitch
Id selectorId = switch32.selector;
Id targetLabel = switch32.def;
ShaderVariable selector = GetSrc(selectorId);
bool longLiterals = ((selector.type == VarType::SLong) || (selector.type == VarType::ULong));
if(!longLiterals)
{
const uint32_t selectorVal = uintComp(selector, 0);
for(size_t i = 0; i < switch32.targets.size(); ++i)
{
SwitchPairU32LiteralId target = switch32.targets[i];
if(selectorVal == target.literal)
{
targetLabel = target.target;
break;
}
}
}
else
{
OpSwitch64 switch64(it);
const uint64_t selectorVal = selector.value.u64v[0];
for(size_t i = 0; i < switch64.targets.size(); ++i)
{
SwitchPairU64LiteralId target = switch64.targets[i];
if(selectorVal == target.literal)
{
targetLabel = target.target;
break;
}
}
}
JumpToLabel(targetLabel);
break;
}
case Op::Branch:
{
OpBranch branch(it);
JumpToLabel(branch.targetLabel);
break;
}
case Op::BranchConditional:
{
OpBranchConditional branch(it);
Id target = branch.falseLabel;
if(uintComp(GetSrc(branch.condition), 0))
target = branch.trueLabel;
JumpToLabel(target);
break;
}
case Op::Phi:
{
OpPhi phi(it);
ShaderVariable var;
StackFrame *frame = callstack.back();
for(const PairIdRefIdRef &parent : phi.parents)
{
if(parent.second == frame->lastBlock)
{
var = GetSrc(parent.first);
break;
}
}
// we should have had a matching for the OpPhi of the block we came from
RDCASSERT(!var.name.empty());
SetDst(phi.result, var);
break;
}
//////////////////////////////////////////////////////////////////////////////
//
// Misc opcodes
//
//////////////////////////////////////////////////////////////////////////////
case Op::CopyObject:
case Op::CopyLogical:
{
// for our purposes differences in offset/decoration between types doesn't matter, so we can
// implement these two the same.
OpCopyObject copy(it);
SetDst(copy.result, GetSrc(copy.operand));
break;
}
case Op::ReadClockKHR:
{
const DataType &resultType = debugger.GetType(opdata.resultType);
ShaderVariable result;
result.type = resultType.scalar().Type();
result.rows = 1;
result.columns = RDCMAX(1U, resultType.vector().count) & 0xff;
// whatever the type is, we just write the full 64-bit value. If it's a 64-bit integer it gets
// it natively, or if it's a 2-vector of uint32_t then it gets the lsb/msb automatically from
// the union.
result.value.u64v[0] = global.clock;
SetDst(opdata.result, result);
break;
}
case Op::IsHelperInvocationEXT:
{
ShaderVariable result;
result.type = VarType::Bool;
result.rows = 1;
result.columns = 1;
setUintComp(result, 0, helperInvocation ? 1 : 0);
SetDst(opdata.result, result);
break;
}
case Op::DemoteToHelperInvocationEXT:
{
helperInvocation = true;
break;
}
//////////////////////////////////////////////////////////////////////////////
//
// Function flow control opcodes
//
//////////////////////////////////////////////////////////////////////////////
case Op::FunctionCall:
{
OpFunctionCall call(it);
// we hit this twice. The first time we don't have a return value so we jump into the
// function. The second time we do have a return value so we process it and continue
if(returnValue.name.empty())
{
uint32_t returnInstruction = nextInstruction - 1;
nextInstruction = debugger.GetInstructionForFunction(call.function);
EnterFunction(call.arguments);
RDCASSERT(callstack.back()->function == call.function);
callstack.back()->funcCallInstruction = returnInstruction;
}
else
{
SetDst(call.result, returnValue);
returnValue.name.clear();
}
break;
}
case Op::Unreachable:
RDCERR("Op::Unreachable reached, terminating debugging!");
DELIBERATE_FALLTHROUGH();
case Op::TerminateInvocation:
case Op::Kill:
{
killed = true;
// destroy all stack frames
for(StackFrame *exitingFrame : callstack)
delete exitingFrame;
callstack.clear();
break;
}
case Op::Return:
case Op::ReturnValue:
{
StackFrame *exitingFrame = callstack.back();
callstack.pop_back();
if(callstack.empty())
{
// if there's no callstack there's no return address, jump to the function end
it++; // see what the next instruction is
// keep going until it's the end of the function
while(OpDecoder(it).op != Op::FunctionEnd)
{
nextInstruction++;
it++;
}
}
else
{
returnValue.name = "<return value>";
if(opdata.op == Op::ReturnValue)
{
OpReturnValue ret(it);
returnValue = GetSrc(ret.value);
}
nextInstruction = exitingFrame->funcCallInstruction;
// process the outgoing and incoming scopes
ProcessScopeChange(live, callstack.back()->live);
// restore the live list from the calling frame
live = callstack.back()->live;
}
for(Id id : exitingFrame->idsCreated)
ids[id] = ShaderVariable();
delete exitingFrame;
break;
}
//////////////////////////////////////////////////////////////////////////////
//
// Atomic opcodes
//
//////////////////////////////////////////////////////////////////////////////
case Op::ImageTexelPointer:
{
// we don't actually process this right now, we just store the parameters for future
// read/write texel use.
OpImageTexelPointer ptr(it);
ShaderVariable result;
result.rows = 1;
result.columns = 1;
result.type = VarType::Struct;
result.members = {ReadPointerValue(ptr.image), GetSrc(ptr.coordinate), GetSrc(ptr.sample)};
result.members[0].name = "image";
result.members[1].name = "coord";
result.members[2].name = "sample";
SetDst(opdata.result, result);
break;
}
case Op::AtomicLoad:
{
OpAtomicLoad load(it);
// ignore for now
(void)load.memory;
(void)load.semantics;
const ShaderVariable &ptr = GetSrc(load.pointer);
ShaderVariable result;
if(ptr.members.empty())
{
result = ReadPointerValue(load.pointer);
}
else
{
const DataType &resultType = debugger.GetType(opdata.resultType);
result.rows = result.columns = 1;
result.type = resultType.scalar().Type();
if(!debugger.GetAPIWrapper()->ReadTexel(ptr.members[0].GetBindIndex(), ptr.members[1],
uintComp(ptr.members[2], 0), result))
{
// sample failed. Pretend we got 0 columns back
RDCEraseEl(result.value);
}
}
SetDst(load.result, result);
break;
}
case Op::AtomicStore:
{
OpAtomicStore store(it);
// ignore for now
(void)store.memory;
(void)store.semantics;
const ShaderVariable &ptr = GetSrc(store.pointer);
const ShaderVariable &value = GetSrc(store.value);
if(ptr.members.empty())
{
WritePointerValue(store.pointer, value);
}
else
{
debugger.GetAPIWrapper()->WriteTexel(ptr.members[0].GetBindIndex(), ptr.members[1],
uintComp(ptr.members[2], 0), value);
}
break;
}
case Op::AtomicExchange:
{
OpAtomicExchange excg(it);
// ignore for now
(void)excg.memory;
(void)excg.semantics;
ShaderVariable result;
const ShaderVariable &ptr = GetSrc(excg.pointer);
const ShaderVariable &value = GetSrc(excg.value);
if(ptr.members.empty())
{
result = ReadPointerValue(excg.pointer);
WritePointerValue(excg.pointer, value);
}
else
{
const DataType &resultType = debugger.GetType(opdata.resultType);
result.rows = result.columns = 1;
result.type = resultType.scalar().Type();
if(!debugger.GetAPIWrapper()->ReadTexel(ptr.members[0].GetBindIndex(), ptr.members[1],
uintComp(ptr.members[2], 0), result))
{
// sample failed. Pretend we got 0 columns back
RDCEraseEl(result.value);
}
debugger.GetAPIWrapper()->WriteTexel(ptr.members[0].GetBindIndex(), ptr.members[1],
uintComp(ptr.members[2], 0), value);
}
SetDst(excg.result, result);
break;
}
case Op::AtomicCompareExchange:
{
OpAtomicCompareExchange cmpexcg(it);
// ignore for now
(void)cmpexcg.memory;
(void)cmpexcg.equal;
(void)cmpexcg.unequal;
ShaderVariable result;
const ShaderVariable &ptr = GetSrc(cmpexcg.pointer);
const ShaderVariable &value = GetSrc(cmpexcg.value);
const ShaderVariable &comparator = GetSrc(cmpexcg.comparator);
if(ptr.members.empty())
{
result = ReadPointerValue(cmpexcg.pointer);
}
else
{
const DataType &resultType = debugger.GetType(opdata.resultType);
result.rows = result.columns = 1;
result.type = resultType.scalar().Type();
if(!debugger.GetAPIWrapper()->ReadTexel(ptr.members[0].GetBindIndex(), ptr.members[1],
uintComp(ptr.members[2], 0), result))
{
// sample failed. Pretend we got 0 columns back
RDCEraseEl(result.value);
}
}
SetDst(cmpexcg.result, result);
uint64_t resultVal = 0, compareVal = 0;
#undef _IMPL
#define _IMPL(I, S, U) resultVal = comp<U>(result, 0);
IMPL_FOR_INT_TYPES_FOR_TYPE(_IMPL, result.type);
#undef _IMPL
#define _IMPL(I, S, U) compareVal = comp<U>(comparator, 0);
IMPL_FOR_INT_TYPES_FOR_TYPE(_IMPL, comparator.type);
// write the new value, only if the value is the same as expected.
if(resultVal == compareVal)
{
if(ptr.members.empty())
{
WritePointerValue(cmpexcg.pointer, value);
}
else
{
debugger.GetAPIWrapper()->WriteTexel(ptr.members[0].GetBindIndex(), ptr.members[1],
uintComp(ptr.members[2], 0), value);
}
}
break;
}
case Op::AtomicIIncrement:
case Op::AtomicIDecrement:
{
OpAtomicIIncrement atomic(it);
// ignore for now
(void)atomic.memory;
(void)atomic.semantics;
ShaderVariable result;
const ShaderVariable &ptr = GetSrc(atomic.pointer);
if(ptr.members.empty())
{
result = ReadPointerValue(atomic.pointer);
}
else
{
const DataType &resultType = debugger.GetType(opdata.resultType);
result.rows = result.columns = 1;
result.type = resultType.scalar().Type();
if(!debugger.GetAPIWrapper()->ReadTexel(ptr.members[0].GetBindIndex(), ptr.members[1],
uintComp(ptr.members[2], 0), result))
{
// sample failed. Pretend we got 0 columns back
RDCEraseEl(result.value);
}
}
SetDst(atomic.result, result);
{
#undef _IMPL
#define _IMPL(I, S, U) \
if(opdata.op == Op::AtomicIIncrement) \
comp<I>(result, 0)++; \
else \
comp<I>(result, 0)--;
IMPL_FOR_INT_TYPES_FOR_TYPE(_IMPL, result.type);
}
// write the new value
if(ptr.members.empty())
{
WritePointerValue(atomic.pointer, result);
}
else
{
debugger.GetAPIWrapper()->WriteTexel(ptr.members[0].GetBindIndex(), ptr.members[1],
uintComp(ptr.members[2], 0), result);
}
break;
}
case Op::AtomicFAddEXT:
case Op::AtomicFMinEXT:
case Op::AtomicFMaxEXT:
case Op::AtomicIAdd:
case Op::AtomicISub:
case Op::AtomicSMin:
case Op::AtomicUMin:
case Op::AtomicSMax:
case Op::AtomicUMax:
case Op::AtomicAnd:
case Op::AtomicOr:
case Op::AtomicXor:
{
OpAtomicIAdd atomic(it);
// ignore for now
(void)atomic.memory;
(void)atomic.semantics;
ShaderVariable result;
const ShaderVariable &ptr = GetSrc(atomic.pointer);
const ShaderVariable &value = GetSrc(atomic.value);
if(ptr.members.empty())
{
result = ReadPointerValue(atomic.pointer);
}
else
{
const DataType &resultType = debugger.GetType(opdata.resultType);
result.rows = result.columns = 1;
result.type = resultType.scalar().Type();
if(!debugger.GetAPIWrapper()->ReadTexel(ptr.members[0].GetBindIndex(), ptr.members[1],
uintComp(ptr.members[2], 0), result))
{
// sample failed. Pretend we got 0 columns back
RDCEraseEl(result.value);
}
}
SetDst(atomic.result, result);
if(opdata.op == Op::AtomicIAdd)
{
#undef _IMPL
#define _IMPL(I, S, U) comp<I>(result, 0) += comp<I>(value, 0)
IMPL_FOR_INT_TYPES_FOR_TYPE(_IMPL, value.type);
}
else if(opdata.op == Op::AtomicISub)
{
#undef _IMPL
#define _IMPL(I, S, U) comp<I>(result, 0) -= comp<I>(value, 0)
IMPL_FOR_INT_TYPES_FOR_TYPE(_IMPL, value.type);
}
else if(opdata.op == Op::AtomicSMin)
{
#undef _IMPL
#define _IMPL(I, S, U) comp<S>(result, 0) = RDCMIN(comp<S>(result, 0), comp<S>(value, 0))
IMPL_FOR_INT_TYPES_FOR_TYPE(_IMPL, value.type);
}
else if(opdata.op == Op::AtomicUMin)
{
#undef _IMPL
#define _IMPL(I, S, U) comp<U>(result, 0) = RDCMIN(comp<U>(result, 0), comp<U>(value, 0))
IMPL_FOR_INT_TYPES_FOR_TYPE(_IMPL, value.type);
}
else if(opdata.op == Op::AtomicSMax)
{
#undef _IMPL
#define _IMPL(I, S, U) comp<S>(result, 0) = RDCMAX(comp<S>(result, 0), comp<S>(value, 0))
IMPL_FOR_INT_TYPES_FOR_TYPE(_IMPL, value.type);
}
else if(opdata.op == Op::AtomicUMax)
{
#undef _IMPL
#define _IMPL(I, S, U) comp<U>(result, 0) = RDCMAX(comp<U>(result, 0), comp<U>(value, 0))
IMPL_FOR_INT_TYPES_FOR_TYPE(_IMPL, value.type);
}
else if(opdata.op == Op::AtomicAnd)
{
#undef _IMPL
#define _IMPL(I, S, U) comp<U>(result, 0) &= comp<U>(value, 0)
IMPL_FOR_INT_TYPES_FOR_TYPE(_IMPL, value.type);
}
else if(opdata.op == Op::AtomicOr)
{
#undef _IMPL
#define _IMPL(I, S, U) comp<U>(result, 0) |= comp<U>(value, 0)
IMPL_FOR_INT_TYPES_FOR_TYPE(_IMPL, value.type);
}
else if(opdata.op == Op::AtomicXor)
{
#undef _IMPL
#define _IMPL(I, S, U) comp<U>(result, 0) ^= comp<U>(value, 0)
IMPL_FOR_INT_TYPES_FOR_TYPE(_IMPL, value.type);
}
else if(opdata.op == Op::AtomicFAddEXT)
{
#undef _IMPL
#define _IMPL(T) comp<T>(result, 0) += comp<T>(value, 0)
IMPL_FOR_FLOAT_TYPES_FOR_TYPE(_IMPL, value.type);
}
else if(opdata.op == Op::AtomicFMaxEXT)
{
#undef _IMPL
#define _IMPL(T) comp<T>(result, 0) += RDCMAX(comp<T>(result, 0), comp<T>(value, 0))
IMPL_FOR_FLOAT_TYPES_FOR_TYPE(_IMPL, value.type);
}
else if(opdata.op == Op::AtomicFMinEXT)
{
#undef _IMPL
#define _IMPL(T) comp<T>(result, 0) += RDCMIN(comp<T>(result, 0), comp<T>(value, 0))
IMPL_FOR_FLOAT_TYPES_FOR_TYPE(_IMPL, value.type);
}
// write the new value
if(ptr.members.empty())
{
WritePointerValue(atomic.pointer, result);
}
else
{
debugger.GetAPIWrapper()->WriteTexel(ptr.members[0].GetBindIndex(), ptr.members[1],
uintComp(ptr.members[2], 0), result);
}
break;
}
//////////////////////////////////////////////////////////////////////////////
//
// Misc. opcodes
//
//////////////////////////////////////////////////////////////////////////////
case Op::Undef:
{
// this was processed as a constant, since it can appear in the constants section as well as
// in blocks. Just assign the value to itself so that it shows up as a change
OpUndef undef(it);
SetDst(undef.result, GetSrc(undef.result));
break;
}
case Op::Nop:
{
// nothing to do
break;
}
// TODO sparse sampling
case Op::ImageSparseSampleImplicitLod:
case Op::ImageSparseSampleExplicitLod:
case Op::ImageSparseSampleDrefImplicitLod:
case Op::ImageSparseSampleDrefExplicitLod:
case Op::ImageSparseSampleProjImplicitLod:
case Op::ImageSparseSampleProjExplicitLod:
case Op::ImageSparseSampleProjDrefImplicitLod:
case Op::ImageSparseSampleProjDrefExplicitLod:
case Op::ImageSparseFetch:
case Op::ImageSparseGather:
case Op::ImageSparseDrefGather:
case Op::ImageSparseTexelsResident:
case Op::ImageSparseRead:
{
RDCERR("Sparse opcodes not supported. SPIR-V should have been rejected by capability!");
ShaderVariable var("", 0U, 0U, 0U, 0U);
var.columns = 1;
SetDst(opdata.result, var);
break;
}
// TODO group ops
case Op::GroupAll:
case Op::GroupAny:
case Op::GroupBroadcast:
case Op::GroupIAdd:
case Op::GroupFAdd:
case Op::GroupFMin:
case Op::GroupUMin:
case Op::GroupSMin:
case Op::GroupFMax:
case Op::GroupUMax:
case Op::GroupSMax:
case Op::GroupNonUniformElect:
case Op::GroupNonUniformAll:
case Op::GroupNonUniformAny:
case Op::GroupNonUniformAllEqual:
case Op::GroupNonUniformBroadcast:
case Op::GroupNonUniformBroadcastFirst:
case Op::GroupNonUniformBallot:
case Op::GroupNonUniformInverseBallot:
case Op::GroupNonUniformBallotBitExtract:
case Op::GroupNonUniformBallotBitCount:
case Op::GroupNonUniformBallotFindLSB:
case Op::GroupNonUniformBallotFindMSB:
case Op::GroupNonUniformShuffle:
case Op::GroupNonUniformShuffleXor:
case Op::GroupNonUniformShuffleUp:
case Op::GroupNonUniformShuffleDown:
case Op::GroupNonUniformIAdd:
case Op::GroupNonUniformFAdd:
case Op::GroupNonUniformIMul:
case Op::GroupNonUniformFMul:
case Op::GroupNonUniformSMin:
case Op::GroupNonUniformUMin:
case Op::GroupNonUniformFMin:
case Op::GroupNonUniformSMax:
case Op::GroupNonUniformUMax:
case Op::GroupNonUniformFMax:
case Op::GroupNonUniformBitwiseAnd:
case Op::GroupNonUniformBitwiseXor:
case Op::GroupNonUniformLogicalAnd:
case Op::GroupNonUniformLogicalOr:
case Op::GroupNonUniformLogicalXor:
case Op::GroupNonUniformQuadBroadcast:
case Op::GroupNonUniformQuadSwap:
case Op::SubgroupBallotKHR:
case Op::SubgroupFirstInvocationKHR:
case Op::SubgroupAllKHR:
case Op::SubgroupAnyKHR:
case Op::SubgroupAllEqualKHR:
case Op::SubgroupReadInvocationKHR:
case Op::SDotKHR:
case Op::UDotKHR:
case Op::SUDotKHR:
case Op::SDotAccSatKHR:
case Op::UDotAccSatKHR:
case Op::SUDotAccSatKHR:
case Op::GroupIMulKHR:
case Op::GroupFMulKHR:
case Op::GroupBitwiseAndKHR:
case Op::GroupBitwiseOrKHR:
case Op::GroupBitwiseXorKHR:
case Op::GroupLogicalAndKHR:
case Op::GroupLogicalOrKHR:
case Op::GroupLogicalXorKHR:
case Op::GroupNonUniformRotateKHR:
{
RDCERR("Group opcodes not supported. SPIR-V should have been rejected by capability!");
ShaderVariable var("", 0U, 0U, 0U, 0U);
var.columns = 1;
SetDst(opdata.result, var);
break;
}
case Op::PtrDiff:
{
RDCERR(
"Variable pointers are not supported, PtrDiff must only be used with variable pointers, "
"not physical pointers");
ShaderVariable var("", 0U, 0U, 0U, 0U);
var.columns = 1;
SetDst(opdata.result, var);
break;
}
case Op::EmitVertex:
case Op::EndPrimitive:
case Op::EmitStreamVertex:
case Op::EndStreamPrimitive:
{
// nothing to do for these, even if debugging geometry shaders?
break;
}
case Op::AssumeTrueKHR:
case Op::ExpectKHR:
{
// we can ignore these, they are optimisation hints
break;
}
case Op::GroupIAddNonUniformAMD:
case Op::GroupFAddNonUniformAMD:
case Op::GroupFMinNonUniformAMD:
case Op::GroupUMinNonUniformAMD:
case Op::GroupSMinNonUniformAMD:
case Op::GroupFMaxNonUniformAMD:
case Op::GroupUMaxNonUniformAMD:
case Op::GroupSMaxNonUniformAMD:
case Op::FragmentMaskFetchAMD:
case Op::FragmentFetchAMD:
case Op::ImageSampleFootprintNV:
case Op::GroupNonUniformPartitionNV:
case Op::WritePackedPrimitiveIndices4x8NV:
case Op::ReportIntersectionKHR:
case Op::IgnoreIntersectionNV:
case Op::TerminateRayNV:
case Op::TraceNV:
case Op::TypeAccelerationStructureKHR:
case Op::ExecuteCallableNV:
case Op::TypeCooperativeMatrixNV:
case Op::CooperativeMatrixLoadNV:
case Op::CooperativeMatrixStoreNV:
case Op::CooperativeMatrixMulAddNV:
case Op::CooperativeMatrixLengthNV:
case Op::BeginInvocationInterlockEXT:
case Op::EndInvocationInterlockEXT:
case Op::SubgroupShuffleINTEL:
case Op::SubgroupShuffleDownINTEL:
case Op::SubgroupShuffleUpINTEL:
case Op::SubgroupShuffleXorINTEL:
case Op::SubgroupBlockReadINTEL:
case Op::SubgroupBlockWriteINTEL:
case Op::SubgroupImageBlockReadINTEL:
case Op::SubgroupImageBlockWriteINTEL:
case Op::SubgroupImageMediaBlockReadINTEL:
case Op::SubgroupImageMediaBlockWriteINTEL:
case Op::UCountLeadingZerosINTEL:
case Op::UCountTrailingZerosINTEL:
case Op::AbsISubINTEL:
case Op::AbsUSubINTEL:
case Op::IAddSatINTEL:
case Op::UAddSatINTEL:
case Op::IAverageINTEL:
case Op::UAverageINTEL:
case Op::IAverageRoundedINTEL:
case Op::UAverageRoundedINTEL:
case Op::ISubSatINTEL:
case Op::USubSatINTEL:
case Op::IMul32x16INTEL:
case Op::UMul32x16INTEL:
case Op::LoopControlINTEL:
case Op::RayQueryGetRayTMinKHR:
case Op::RayQueryGetRayFlagsKHR:
case Op::RayQueryGetIntersectionTKHR:
case Op::RayQueryGetIntersectionInstanceCustomIndexKHR:
case Op::RayQueryGetIntersectionInstanceIdKHR:
case Op::RayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR:
case Op::RayQueryGetIntersectionGeometryIndexKHR:
case Op::RayQueryGetIntersectionPrimitiveIndexKHR:
case Op::RayQueryGetIntersectionBarycentricsKHR:
case Op::RayQueryGetIntersectionFrontFaceKHR:
case Op::RayQueryGetIntersectionCandidateAABBOpaqueKHR:
case Op::RayQueryGetIntersectionObjectRayDirectionKHR:
case Op::RayQueryGetIntersectionObjectRayOriginKHR:
case Op::RayQueryGetWorldRayDirectionKHR:
case Op::RayQueryGetWorldRayOriginKHR:
case Op::RayQueryGetIntersectionObjectToWorldKHR:
case Op::RayQueryGetIntersectionWorldToObjectKHR:
case Op::TypeRayQueryKHR:
case Op::RayQueryInitializeKHR:
case Op::RayQueryTerminateKHR:
case Op::RayQueryGenerateIntersectionKHR:
case Op::RayQueryConfirmIntersectionKHR:
case Op::RayQueryProceedKHR:
case Op::RayQueryGetIntersectionTypeKHR:
case Op::TraceRayKHR:
case Op::ExecuteCallableKHR:
case Op::ConvertUToAccelerationStructureKHR:
case Op::IgnoreIntersectionKHR:
case Op::TerminateRayKHR:
case Op::TraceMotionNV:
case Op::TraceRayMotionNV:
case Op::TypeBufferSurfaceINTEL:
case Op::TypeStructContinuedINTEL:
case Op::ConstantCompositeContinuedINTEL:
case Op::SpecConstantCompositeContinuedINTEL:
case Op::ConvertUToImageNV:
case Op::ConvertUToSamplerNV:
case Op::ConvertUToSampledImageNV:
case Op::ConvertImageToUNV:
case Op::ConvertSamplerToUNV:
case Op::ConvertSampledImageToUNV:
case Op::SamplerImageAddressingModeNV:
case Op::EmitMeshTasksEXT:
case Op::SetMeshOutputsEXT:
case Op::HitObjectRecordHitMotionNV:
case Op::HitObjectRecordHitWithIndexMotionNV:
case Op::HitObjectRecordMissMotionNV:
case Op::HitObjectGetWorldToObjectNV:
case Op::HitObjectGetObjectToWorldNV:
case Op::HitObjectGetObjectRayDirectionNV:
case Op::HitObjectGetObjectRayOriginNV:
case Op::HitObjectTraceRayMotionNV:
case Op::HitObjectGetShaderRecordBufferHandleNV:
case Op::HitObjectGetShaderBindingTableRecordIndexNV:
case Op::HitObjectRecordEmptyNV:
case Op::HitObjectTraceRayNV:
case Op::HitObjectRecordHitNV:
case Op::HitObjectRecordHitWithIndexNV:
case Op::HitObjectRecordMissNV:
case Op::HitObjectExecuteShaderNV:
case Op::HitObjectGetCurrentTimeNV:
case Op::HitObjectGetAttributesNV:
case Op::HitObjectGetHitKindNV:
case Op::HitObjectGetPrimitiveIndexNV:
case Op::HitObjectGetGeometryIndexNV:
case Op::HitObjectGetInstanceIdNV:
case Op::HitObjectGetInstanceCustomIndexNV:
case Op::HitObjectGetWorldRayDirectionNV:
case Op::HitObjectGetWorldRayOriginNV:
case Op::HitObjectGetRayTMaxNV:
case Op::HitObjectGetRayTMinNV:
case Op::HitObjectIsEmptyNV:
case Op::HitObjectIsHitNV:
case Op::HitObjectIsMissNV:
case Op::ReorderThreadWithHitObjectNV:
case Op::ReorderThreadWithHintNV:
case Op::TypeHitObjectNV:
case Op::ColorAttachmentReadEXT:
case Op::DepthAttachmentReadEXT:
case Op::StencilAttachmentReadEXT:
case Op::ImageSampleWeightedQCOM:
case Op::ImageBoxFilterQCOM:
case Op::ImageBlockMatchSADQCOM:
case Op::ImageBlockMatchSSDQCOM:
case Op::RayQueryGetIntersectionTriangleVertexPositionsKHR:
case Op::ConvertBF16ToFINTEL:
case Op::ConvertFToBF16INTEL:
case Op::TypeCooperativeMatrixKHR:
case Op::CooperativeMatrixLoadKHR:
case Op::CooperativeMatrixStoreKHR:
case Op::CooperativeMatrixMulAddKHR:
case Op::CooperativeMatrixLengthKHR:
case Op::ImageBlockMatchWindowSSDQCOM:
case Op::ImageBlockMatchWindowSADQCOM:
case Op::ImageBlockMatchGatherSSDQCOM:
case Op::ImageBlockMatchGatherSADQCOM:
case Op::FinalizeNodePayloadsAMDX:
case Op::FinishWritingNodePayloadAMDX:
case Op::InitializeNodePayloadsAMDX:
case Op::GroupNonUniformQuadAllKHR:
case Op::GroupNonUniformQuadAnyKHR:
case Op::FetchMicroTriangleVertexBarycentricNV:
case Op::FetchMicroTriangleVertexPositionNV:
case Op::CompositeConstructContinuedINTEL:
case Op::MaskedGatherINTEL:
case Op::MaskedScatterINTEL:
{
RDCERR("Unsupported extension opcode used %s", ToStr(opdata.op).c_str());
ShaderVariable var("", 0U, 0U, 0U, 0U);
var.columns = 1;
SetDst(opdata.result, var);
break;
}
case Op::SourceContinued:
case Op::Source:
case Op::SourceExtension:
case Op::Name:
case Op::MemberName:
case Op::String:
case Op::Extension:
case Op::ExtInstImport:
case Op::MemoryModel:
case Op::EntryPoint:
case Op::ExecutionMode:
case Op::Capability:
case Op::TypeVoid:
case Op::TypeBool:
case Op::TypeInt:
case Op::TypeFloat:
case Op::TypeVector:
case Op::TypeMatrix:
case Op::TypeImage:
case Op::TypeSampler:
case Op::TypeSampledImage:
case Op::TypeArray:
case Op::TypeRuntimeArray:
case Op::TypeStruct:
case Op::TypeOpaque:
case Op::TypePointer:
case Op::TypeFunction:
case Op::TypeEvent:
case Op::TypeDeviceEvent:
case Op::TypeReserveId:
case Op::TypeQueue:
case Op::TypePipe:
case Op::TypeForwardPointer:
case Op::ConstantTrue:
case Op::ConstantFalse:
case Op::Constant:
case Op::ConstantComposite:
case Op::ConstantSampler:
case Op::ConstantNull:
case Op::SpecConstantTrue:
case Op::SpecConstantFalse:
case Op::SpecConstant:
case Op::SpecConstantComposite:
case Op::SpecConstantOp:
case Op::Decorate:
case Op::MemberDecorate:
case Op::DecorationGroup:
case Op::GroupDecorate:
case Op::GroupMemberDecorate:
case Op::DecorateString:
case Op::MemberDecorateString:
case Op::DecorateId:
case Op::ModuleProcessed:
case Op::ExecutionModeId:
{
RDCERR("Encountered unexpected global SPIR-V operation %s", ToStr(opdata.op).c_str());
break;
}
case Op::GenericPtrMemSemantics:
case Op::ImageQueryFormat:
case Op::ImageQueryOrder:
case Op::SatConvertSToU:
case Op::SatConvertUToS:
case Op::PtrCastToGeneric:
case Op::GenericCastToPtr:
case Op::GenericCastToPtrExplicit:
case Op::SizeOf:
case Op::CopyMemorySized:
case Op::IsFinite:
case Op::IsNormal:
case Op::SignBitSet:
case Op::LessOrGreater:
case Op::Ordered:
case Op::Unordered:
case Op::LifetimeStart:
case Op::LifetimeStop:
case Op::AtomicCompareExchangeWeak:
case Op::AtomicFlagTestAndSet:
case Op::AtomicFlagClear:
case Op::GroupAsyncCopy:
case Op::GroupWaitEvents:
case Op::GetKernelLocalSizeForSubgroupCount:
case Op::GetKernelMaxNumSubgroups:
case Op::EnqueueMarker:
case Op::EnqueueKernel:
case Op::GetKernelNDrangeSubGroupCount:
case Op::GetKernelNDrangeMaxSubGroupSize:
case Op::GetKernelWorkGroupSize:
case Op::GetKernelPreferredWorkGroupSizeMultiple:
case Op::RetainEvent:
case Op::ReleaseEvent:
case Op::CreateUserEvent:
case Op::IsValidEvent:
case Op::SetUserEventStatus:
case Op::CaptureEventProfilingInfo:
case Op::GetDefaultQueue:
case Op::BuildNDRange:
case Op::TypeNamedBarrier:
case Op::NamedBarrierInitialize:
case Op::MemoryNamedBarrier:
case Op::ReadPipe:
case Op::WritePipe:
case Op::ReservedReadPipe:
case Op::ReservedWritePipe:
case Op::ReserveReadPipePackets:
case Op::ReserveWritePipePackets:
case Op::CommitReadPipe:
case Op::CommitWritePipe:
case Op::IsValidReserveId:
case Op::GetNumPipePackets:
case Op::GetMaxPipePackets:
case Op::GroupReserveReadPipePackets:
case Op::GroupReserveWritePipePackets:
case Op::GroupCommitReadPipe:
case Op::GroupCommitWritePipe:
case Op::TypePipeStorage:
case Op::ConstantPipeStorage:
case Op::CreatePipeFromPipeStorage:
case Op::FPGARegINTEL:
case Op::ReadPipeBlockingINTEL:
case Op::WritePipeBlockingINTEL:
case Op::ControlBarrierArriveINTEL:
case Op::ControlBarrierWaitINTEL:
{
// these are kernel only
RDCERR("Encountered unexpected kernel SPIR-V operation %s", ToStr(opdata.op).c_str());
break;
}
case Op::Line:
case Op::NoLine:
case Op::Function:
case Op::FunctionParameter:
case Op::FunctionEnd:
case Op::Variable:
{
// these should be handled elsewhere specially
RDCERR("Encountered SPIR-V operation %s in general dispatch loop", ToStr(opdata.op).c_str());
break;
}
case Op::Max: RDCWARN("Unhandled SPIR-V operation %s", ToStr(opdata.op).c_str()); break;
}
// skip over any degenerate branches
while(!debugger.HasDebugInfo())
{
it = debugger.GetIterForInstruction(nextInstruction);
if(it.opcode() == Op::Branch)
{
Id target = OpBranch(it).targetLabel;
it++;
while(it.opcode() == Op::Line || it.opcode() == Op::NoLine)
it++;
if(target == OpLabel(it).result)
{
JumpToLabel(target);
continue;
}
}
break;
}
SkipIgnoredInstructions();
// set the state's next instruction (if we have one) to ours, bounded by how many
// instructions there are
if(m_State)
m_State->nextInstruction = RDCMIN(nextInstruction, debugger.GetNumInstructions() - 1);
m_State = NULL;
}