source/backend/opengl/glsl/col2im.glsl (33 lines of code) (raw):

layout(std430) buffer; layout(binding=0, FORMAT) writeonly uniform mediump image3D uOutput; layout(location=1) uniform mediump sampler2D uInput; layout(binding=2) readonly buffer bias{ vec4 data[]; } uBias; layout(location=3) uniform ivec3 outputSize; layout (local_size_x = XLOCAL, local_size_y = YLOCAL, local_size_z = ZLOCAL) in; #define UP_DIV(x, y) (((x)+(y)-1)/(y)) //index : ob*oc/4, oh, ow //outputsize : oc/4, oh, ow //input temp image : oc/4 * (ob*oh*ow)%4, (ob*oh*ow)/4 * oc4 void main() { ivec3 pos = ivec3(gl_GlobalInvocationID); int ob = pos.z / outputSize.z; int oc_4 = pos.z % outputSize.z; if (all(lessThan(pos.xy, outputSize.xy))) { int sourceXIndex = ob*outputSize.x*outputSize.y + pos.y*outputSize.x + pos.x; int sourceX = sourceXIndex / 4; int sourceY = oc_4 * 4 + sourceXIndex % 4; vec4 color = uBias.data[pos.z]; color += texelFetch(uInput, ivec2(sourceX, sourceY), 0); #ifdef RELU color = max(color, vec4(0)); #endif #ifdef RELU6 color = clamp(color, vec4(0), vec4(6)); #endif imageStore(uOutput, pos, color); } }