source/backend/opengl/glsl/kernel2image.glsl (20 lines of code) (raw):

layout(std430) buffer; layout(FORMAT, binding=0) writeonly uniform PRECISION image2D uOutput; layout(binding=2) readonly buffer kernel{ vec4 data[]; } uKernel; layout(location = 3) uniform int width; layout(location = 4) uniform int height; //index : ky * kx, oc/4, ic/4 //kernel buffer : oc ic h w -> oc/4 ic/4 ky kx ic4 oc4 //kernel image : oc/4, ky * kx * ic/4 * ic4 layout (local_size_x = 4, local_size_y = 4, local_size_z = 1) in; void main() { ivec3 pos = ivec3(gl_GlobalInvocationID); if (pos.x < width && pos.y < height) { vec4 res = uKernel.data[pos.x+pos.y*width]; imageStore(uOutput, ivec2(pos.x, pos.y), res); } }