source/backend/opengl/glsl/permute.glsl (28 lines of code) (raw):

layout(binding = 0) readonly buffer srcBuffer{ float data[]; }uInput; layout(binding = 1) writeonly buffer dstBuffer{ float data[]; }uOutput; layout(location=2) uniform ivec4 dims; layout(location=3) uniform ivec4 inImSize; layout(location=4) uniform ivec4 outImSize; layout (local_size_x = XLOCAL, local_size_y = YLOCAL, local_size_z = ZLOCAL) in; void main() { ivec3 pos = ivec3(gl_GlobalInvocationID); ivec3 inImgSize = ivec3(inImSize.xyz); ivec3 outImgSize = ivec3(outImSize.xyz); // input, output all are NCHW layout ivec4 dimParam = dims.xyzw; if(pos.x < outImgSize.x && pos.y < outImgSize.y) { int dimIndex[4]; dimIndex[dimParam.y] = pos.z; dimIndex[dimParam.z] = pos.y; dimIndex[dimParam.w] = pos.x; int inputIndex = dimIndex[1] * inImgSize.x * inImgSize.y + dimIndex[2] * inImgSize.x + dimIndex[3]; int outputIndex = pos.x + pos.y * outImgSize.x + pos.z * outImgSize.x * outImgSize.y; uOutput.data[outputIndex] = uInput.data[inputIndex]; } }