source/backend/metal/render/AllRenderShader.cpp (1,356 lines of code) (raw):

#include "AllRenderShader.hpp" const char* render_shader_float2int_metal = "using namespace metal;\n" "struct constBuffer\n" "{\n" " int4 size;\n" " float4 scale;\n" "};\n" "struct destBuffer\n" "{\n" " int data[1];\n" "};\n" "struct sourceBuffer0\n" "{\n" " float data[1];\n" "};\n" "constant uint3 gl_WorkGroupSize [[maybe_unused]]=uint3(256u,1u,1u);\n" "kernel void main0(device destBuffer& uOutput [[buffer(0)]],const device sourceBuffer0& uInput [[buffer(1)]],constant constBuffer& uConstant [[buffer(2)]],uint3 gl_GlobalInvocationID [[thread_position_in_grid]])\n" "{\n" " int pos=int(gl_GlobalInvocationID.x);\n" " int4 size=uConstant.size;\n" " if (pos<size.x)\n" " {\n" " uOutput.data[pos]=int((uInput.data[pos]*uConstant.scale.x)+uConstant.scale.y);\n" " }\n" "}\n" ; const char* render_shader_texture2dgrad_metal = "using namespace metal;\n" "struct gridSampleBuffer\n" "{\n" " int4 inShape;\n" " int4 outShape;\n" " uint alignCorners;\n" "};\n" "struct sourceBuffer0\n" "{\n" " int data[1];\n" "};\n" "struct sourceBuffer1\n" "{\n" " float data[1];\n" "};\n" "struct destBuffer\n" "{\n" " float data[1];\n" "};\n" "constant uint3 gl_WorkGroupSize [[maybe_unused]]=uint3(256u,1u,1u);\n" "static inline __attribute__((always_inline))\n" "void WriteSample(thread int& positionX,thread int& positionY,thread const int& c,thread const int& n,thread const float& V_f,constant gridSampleBuffer& uGridSampleParam,device sourceBuffer0& uInput)\n" "{\n" " int V=int(V_f*16777216.0);\n" " int width=uGridSampleParam.inShape.x;\n" " int height=uGridSampleParam.inShape.y;\n" " positionX=clamp(positionX,0,width-1);\n" " positionY=clamp(positionY,0,height-1);\n" " int _77=atomic_fetch_add_explicit((device atomic_int*)&uInput.data[(((0+(positionX*uGridSampleParam.inShape.z))+((positionY*width)*uGridSampleParam.inShape.z))+(((n*width)*height)*uGridSampleParam.inShape.z))+c],V,memory_order_relaxed);\n" "}\n" "kernel void main0(const device destBuffer& uOutput [[buffer(0)]],device sourceBuffer0& uInput [[buffer(1)]],const device sourceBuffer1& uGrid [[buffer(2)]],constant gridSampleBuffer& uGridSampleParam [[buffer(3)]],uint3 gl_GlobalInvocationID [[thread_position_in_grid]])\n" "{\n" " int pos=int(gl_GlobalInvocationID.x);\n" " int4 inputShape=uGridSampleParam.inShape;\n" " int4 outputShape=uGridSampleParam.outShape;\n" " int total=((outputShape.x*outputShape.y)*outputShape.z)*outputShape.w;\n" " if (pos<total)\n" " {\n" " int x=pos % outputShape.x;\n" " int tmp=pos/outputShape.x;\n" " int y=tmp % outputShape.y;\n" " tmp /= outputShape.y;\n" " int z=tmp % outputShape.z;\n" " int n=tmp/outputShape.z;\n" " int gridPosition=(((n*outputShape.x)*outputShape.y)+(y*outputShape.x))+x;\n" " float gridX=uGrid.data[(inputShape.w*gridPosition)+0];\n" " float gridY=uGrid.data[(inputShape.w*gridPosition)+1];\n" " float V=uOutput.data[(((0+(x*outputShape.z))+((y*outputShape.x)*outputShape.z))+z)+(((n*outputShape.x)*outputShape.y)*outputShape.z)];\n" " float cordH=(gridY*float(inputShape.y))-0.5;\n" " float cordW=(gridX*float(inputShape.x))-0.5;\n" " int w0_h=int(floor(cordH));\n" " int w0_w=int(floor(cordW));\n" " int w1_h=w0_h+1;\n" " int w1_w=w0_w+1;\n" " float f0=float(w1_w)-cordW;\n" " float f1=1.0-f0;\n" " float h0=float(w1_h)-cordH;\n" " float h1=1.0-h0;\n" " float f00=(f0*h0)*V;\n" " float f01=(f1*h0)*V;\n" " float f10=(f0*h1)*V;\n" " float f11=(f1*h1)*V;\n" " int param=w0_w;\n" " int param_1=w0_h;\n" " int param_2=z;\n" " int param_3=n;\n" " float param_4=f00;\n" " WriteSample(param,param_1,param_2,param_3,param_4,uGridSampleParam,uInput);\n" " int param_5=w1_w;\n" " int param_6=w0_h;\n" " int param_7=z;\n" " int param_8=n;\n" " float param_9=f01;\n" " WriteSample(param_5,param_6,param_7,param_8,param_9,uGridSampleParam,uInput);\n" " int param_10=w0_w;\n" " int param_11=w1_h;\n" " int param_12=z;\n" " int param_13=n;\n" " float param_14=f10;\n" " WriteSample(param_10,param_11,param_12,param_13,param_14,uGridSampleParam,uInput);\n" " int param_15=w1_w;\n" " int param_16=w1_h;\n" " int param_17=z;\n" " int param_18=n;\n" " float param_19=f11;\n" " WriteSample(param_15,param_16,param_17,param_18,param_19,uGridSampleParam,uInput);\n" " }\n" "}\n" ; const char* render_shader_rastersort_collect_key_metal = "using namespace metal;\n" "struct constBuffer\n" "{\n" " uint4 point;\n" "};\n" "struct histogram\n" "{\n" " uint data[1];\n" "};\n" "struct pointkeys\n" "{\n" " uint2 data[1];\n" "};\n" "struct sourceBuffer1\n" "{\n" " float4 data[1];\n" "};\n" "struct sourceBuffer0\n" "{\n" " TYPE data[1];\n" "};\n" "#ifndef SPIRV_CROSS_CONSTANT_ID_0\n" "#define SPIRV_CROSS_CONSTANT_ID_0 1u\n" "#endif\n" "constant uint _210=SPIRV_CROSS_CONSTANT_ID_0;\n" "#ifndef SPIRV_CROSS_CONSTANT_ID_1\n" "#define SPIRV_CROSS_CONSTANT_ID_1 1u\n" "#endif\n" "constant uint _211=SPIRV_CROSS_CONSTANT_ID_1;\n" "#ifndef SPIRV_CROSS_CONSTANT_ID_2\n" "#define SPIRV_CROSS_CONSTANT_ID_2 1u\n" "#endif\n" "constant uint _212=SPIRV_CROSS_CONSTANT_ID_2;\n" "constant uint3 gl_WorkGroupSize [[maybe_unused]]=uint3(_210,_211,_212);\n" "kernel void main0(device pointkeys& uPointKeys [[buffer(0)]],const device sourceBuffer0& uAttr [[buffer(1)]],const device sourceBuffer1& uViewProj [[buffer(2)]],const device histogram& uHistogram [[buffer(3)]],constant constBuffer& uConstant [[buffer(4)]],uint3 gl_NumWorkGroups [[threadgroups_per_grid]],uint3 gl_GlobalInvocationID [[thread_position_in_grid]])\n" "{\n" " uint groupSize=gl_NumWorkGroups.x;\n" " uint threadId=gl_GlobalInvocationID.x;\n" " uint threadNumber=groupSize*LOCAL_SIZE;\n" " uint totalSize=uConstant.point.x;\n" " uint size=((totalSize+threadNumber)-1u)/threadNumber;\n" " uint sta=threadId*size;\n" " uint fin=min((sta+size),totalSize);\n" " uint offset=0u;\n" " if (threadId>0u)\n" " {\n" " offset=uHistogram.data[threadId-1u];\n" " }\n" " uint sortSize=uHistogram.data[threadNumber-1u];\n" " if (threadId == 0u)\n" " {\n" " if ((sortSize % 2u)>0u)\n" " {\n" " ((device uint*)&uPointKeys.data[sortSize])[0u]=32767u;\n" " ((device uint*)&uPointKeys.data[sortSize])[1u]=0u;\n" " }\n" " }\n" " uint2 kv;\n" " for (uint pos=sta; pos<fin; pos++)\n" " {\n" " float4 vp0=uViewProj.data[0];\n" " float4 vp1=uViewProj.data[1];\n" " float4 vp2=uViewProj.data[2];\n" " float4 vp3=uViewProj.data[3];\n" " float4 attr=float4(uAttr.data[pos]);\n" " float depth=(((attr.x*vp0.z)+(attr.y*vp1.z))+(attr.z*vp2.z))+vp3.z;\n" " float dw=(((attr.x*vp0.w)+(attr.y*vp1.w))+(attr.z*vp2.w))+vp3.w;\n" " depth /= dw;\n" " if ((depth >= 0.0) && (depth <= 1.0))\n" " {\n" " kv.x=uint(depth*32767.0);\n" " kv.y=pos;\n" " uPointKeys.data[offset]=kv;\n" " offset++;\n" " }\n" " }\n" "}\n" ; const char* render_shader_radixsort_histogram_metal = "using namespace metal;\n" "template<typename T,size_t Num>\n" "struct spvUnsafeArray\n" "{\n" " T elements[Num ? Num : 1];\n" " \n" " thread T& operator [] (size_t pos) thread\n" " {\n" " return elements[pos];\n" " }\n" " constexpr const thread T& operator [] (size_t pos) const thread\n" " {\n" " return elements[pos];\n" " }\n" " \n" " device T& operator [] (size_t pos) device\n" " {\n" " return elements[pos];\n" " }\n" " constexpr const device T& operator [] (size_t pos) const device\n" " {\n" " return elements[pos];\n" " }\n" " \n" " constexpr const constant T& operator [] (size_t pos) const constant\n" " {\n" " return elements[pos];\n" " }\n" " \n" " threadgroup T& operator [] (size_t pos) threadgroup\n" " {\n" " return elements[pos];\n" " }\n" " constexpr const threadgroup T& operator [] (size_t pos) const threadgroup\n" " {\n" " return elements[pos];\n" " }\n" "};\n" "struct variableBuffer\n" "{\n" " uint4 off;\n" "};\n" "struct variablepBuffer\n" "{\n" " uint4 off;\n" "};\n" "struct pointO\n" "{\n" " uint4 data[1];\n" "};\n" "struct pointI\n" "{\n" " uint data[1];\n" "};\n" "constant uint3 gl_WorkGroupSize [[maybe_unused]]=uint3(256u,1u,1u);\n" "kernel void main0(device pointI& uHistogram [[buffer(0)]],const device pointO& uPointKeysInput [[buffer(1)]],constant variableBuffer& uOffset [[buffer(2)]],constant variablepBuffer& uPass [[buffer(3)]],uint3 gl_NumWorkGroups [[threadgroups_per_grid]],uint3 gl_GlobalInvocationID [[thread_position_in_grid]])\n" "{\n" " uint groupSize=gl_NumWorkGroups.x;\n" " uint threadId=gl_GlobalInvocationID.x;\n" " spvUnsafeArray<uint,256> binSize;\n" " for (int i=0; i<256; i++)\n" " {\n" " binSize[i]=0u;\n" " }\n" " uint totalSize=(uOffset.off.x+1u)/2u;\n" " uint threadNumber=groupSize*256u;\n" " uint size=((totalSize+threadNumber)-1u)/threadNumber;\n" " uint sta=threadId*size;\n" " uint fin=min((sta+size),totalSize);\n" " uint pass=uPass.off.x;\n" " uint div=1u;\n" " for (uint i_1=0u; i_1<pass; i_1++)\n" " {\n" " div *= 256u;\n" " }\n" " for (uint i_2=sta; i_2<fin; i_2++)\n" " {\n" " uint2 key=uPointKeysInput.data[i_2].xz/uint2(div);\n" " key %= uint2(256u);\n" " binSize[key.x]++;\n" " binSize[key.y]++;\n" " }\n" " for (int i_3=0; i_3<256; i_3++)\n" " {\n" " uHistogram.data[(uint(i_3)*threadNumber)+threadId]=binSize[i_3];\n" " }\n" "}\n" ; const char* render_shader_texturecube_metal = "using namespace metal;\n" "struct gridSampleBuffer\n" "{\n" " int4 inShape;\n" " int4 outShape;\n" " uint alignCorners;\n" "};\n" "struct sourceBuffer0\n" "{\n" " float data[1];\n" "};\n" "struct sourceBuffer1\n" "{\n" " float data[1];\n" "};\n" "struct destBuffer\n" "{\n" " float data[1];\n" "};\n" "constant uint3 gl_WorkGroupSize [[maybe_unused]]=uint3(256u,1u,1u);\n" "static inline __attribute__((always_inline))\n" "void indexCubeMap(thread const float3& d,thread int& face,thread float& s,thread float& t)\n" "{\n" " float3 absd;\n" " absd.x=abs(d.x);\n" " absd.y=abs(d.y);\n" " absd.z=abs(d.z);\n" " face=-1;\n" " bool _49=absd.x >= absd.y;\n" " bool _57;\n" " if (_49)\n" " {\n" " _57=absd.x >= absd.z;\n" " }\n" " else\n" " {\n" " _57=_49;\n" " }\n" " float sc;\n" " float tc;\n" " float ma;\n" " if (_57)\n" " {\n" " if (d.x>0.0)\n" " {\n" " face=0;\n" " sc=-d.z;\n" " tc=-d.y;\n" " ma=absd.x;\n" " }\n" " else\n" " {\n" " face=1;\n" " sc=d.z;\n" " tc=-d.y;\n" " ma=absd.x;\n" " }\n" " }\n" " bool _91=absd.y >= absd.x;\n" " bool _99;\n" " if (_91)\n" " {\n" " _99=absd.y >= absd.z;\n" " }\n" " else\n" " {\n" " _99=_91;\n" " }\n" " if (_99)\n" " {\n" " if (d.y>0.0)\n" " {\n" " face=2;\n" " sc=d.x;\n" " tc=d.z;\n" " ma=absd.y;\n" " }\n" " else\n" " {\n" " face=3;\n" " sc=d.x;\n" " tc=-d.z;\n" " ma=absd.y;\n" " }\n" " }\n" " bool _127=absd.z >= absd.x;\n" " bool _135;\n" " if (_127)\n" " {\n" " _135=absd.z >= absd.y;\n" " }\n" " else\n" " {\n" " _135=_127;\n" " }\n" " if (_135)\n" " {\n" " if (d.z>0.0)\n" " {\n" " face=4;\n" " sc=d.x;\n" " tc=-d.y;\n" " ma=absd.z;\n" " }\n" " else\n" " {\n" " face=5;\n" " sc=-d.x;\n" " tc=-d.y;\n" " ma=absd.z;\n" " }\n" " }\n" " if (ma == 0.0)\n" " {\n" " s=0.0;\n" " t=0.0;\n" " face=-1;\n" " }\n" " else\n" " {\n" " s=((sc/ma)+1.0)*0.5;\n" " t=((tc/ma)+1.0)*0.5;\n" " }\n" "}\n" "static inline __attribute__((always_inline))\n" "float LoadSample(thread int& positionX,thread int& positionY,thread const int& c,thread const int& n,constant gridSampleBuffer& uGridSampleParam,const device sourceBuffer0& uInput)\n" "{\n" " int width=uGridSampleParam.inShape.x;\n" " int height=uGridSampleParam.inShape.y;\n" " positionX=clamp(positionX,0,width-1);\n" " positionY=clamp(positionY,0,height-1);\n" " float V=uInput.data[(((0+(positionX*uGridSampleParam.inShape.z))+((positionY*width)*uGridSampleParam.inShape.z))+(((n*width)*height)*uGridSampleParam.inShape.z))+c];\n" " return V;\n" "}\n" "kernel void main0(device destBuffer& uOutput [[buffer(0)]],const device sourceBuffer0& uInput [[buffer(1)]],const device sourceBuffer1& uGrid [[buffer(2)]],constant gridSampleBuffer& uGridSampleParam [[buffer(3)]],uint3 gl_GlobalInvocationID [[thread_position_in_grid]])\n" "{\n" " int pos=int(gl_GlobalInvocationID.x);\n" " int4 inputShape=uGridSampleParam.inShape;\n" " int4 outputShape=uGridSampleParam.outShape;\n" " int total=((outputShape.x*outputShape.y)*outputShape.z)*outputShape.w;\n" " if (pos<total)\n" " {\n" " int x=pos % outputShape.x;\n" " int tmp=pos/outputShape.x;\n" " int y=tmp % outputShape.y;\n" " tmp /= outputShape.y;\n" " int z=tmp % outputShape.z;\n" " int on=tmp/outputShape.z;\n" " int gridPosition=(((on*outputShape.x)*outputShape.y)+(y*outputShape.x))+x;\n" " float u=uGrid.data[(inputShape.w*gridPosition)+0];\n" " float v=uGrid.data[(inputShape.w*gridPosition)+1];\n" " float w=uGrid.data[(inputShape.w*gridPosition)+2];\n" " float3 param=float3(u,v,w);\n" " int param_1;\n" " float param_2;\n" " float param_3;\n" " indexCubeMap(param,param_1,param_2,param_3);\n" " int face=param_1;\n" " float gridX=param_2;\n" " float gridY=param_3;\n" " float V=0.0;\n" " if (face >= 0)\n" " {\n" " int n=(on*6)+face;\n" " float cordH=(gridY*float(inputShape.y))-0.5;\n" " float cordW=(gridX*float(inputShape.x))-0.5;\n" " int w0_h=int(floor(cordH));\n" " int w0_w=int(floor(cordW));\n" " int w1_h=w0_h+1;\n" " int w1_w=w0_w+1;\n" " float oneV=1.0;\n" " int param_4=w0_w;\n" " int param_5=w0_h;\n" " int param_6=z;\n" " int param_7=n;\n" " float _401=LoadSample(param_4,param_5,param_6,param_7,uGridSampleParam,uInput);\n" " float i00=_401;\n" " int param_8=w1_w;\n" " int param_9=w0_h;\n" " int param_10=z;\n" " int param_11=n;\n" " float _411=LoadSample(param_8,param_9,param_10,param_11,uGridSampleParam,uInput);\n" " float i01=_411;\n" " int param_12=w0_w;\n" " int param_13=w1_h;\n" " int param_14=z;\n" " int param_15=n;\n" " float _421=LoadSample(param_12,param_13,param_14,param_15,uGridSampleParam,uInput);\n" " float i10=_421;\n" " int param_16=w1_w;\n" " int param_17=w1_h;\n" " int param_18=z;\n" " int param_19=n;\n" " float _431=LoadSample(param_16,param_17,param_18,param_19,uGridSampleParam,uInput);\n" " float i11=_431;\n" " float f0=float(w1_w)-cordW;\n" " float f1=oneV-f0;\n" " float h0=float(w1_h)-cordH;\n" " float h1=oneV-h0;\n" " float i0=(i00*f0)+(i01*f1);\n" " float i1=(i10*f0)+(i11*f1);\n" " V=(i0*h0)+(i1*h1);\n" " }\n" " uOutput.data[(((0+(x*outputShape.z))+((y*outputShape.x)*outputShape.z))+z)+(((on*outputShape.x)*outputShape.y)*outputShape.z)]=V;\n" " }\n" "}\n" ; const char* render_shader_radixsort_cumsum_metal = "using namespace metal;\n" "struct constBuffer\n" "{\n" " int4 point;\n" "};\n" "struct pointoffset\n" "{\n" " uint4 data[1];\n" "};\n" "struct pointoffsetSum\n" "{\n" " uint4 data[1];\n" "};\n" "kernel void main0(device pointoffsetSum& uPointoffsetSum [[buffer(0)]],const device pointoffset& uPointoffset [[buffer(1)]],constant constBuffer& uConstant [[buffer(2)]],uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])\n" "{\n" " threadgroup uint local_sum[LOCAL_SIZE];\n" " int tId=int(gl_LocalInvocationID.x);\n" " int size=(uConstant.point.x+3)/4;\n" " int curOffset=0;\n" " uint sum=0u;\n" " uint4 threadBuffer[UNIT];\n" " uint _233;\n" " while (curOffset<size)\n" " {\n" " int sta=(tId*UNIT)+curOffset;\n" " int fin=min((sta+UNIT),size);\n" " for (int i=sta; i<fin; i++)\n" " {\n" " int lpos=i-sta;\n" " uint4 p0=uPointoffset.data[i];\n" " p0.y += p0.x;\n" " p0.z += p0.y;\n" " p0.w += p0.z;\n" " threadBuffer[lpos]=p0;\n" " }\n" " int _112=sta+1;\n" " for (int i_1=_112; i_1<fin; i_1++)\n" " {\n" " int lpos_1=i_1-sta;\n" " uint4 p0_1=threadBuffer[lpos_1];\n" " uint4 p1=threadBuffer[lpos_1-1];\n" " p0_1 += uint4(p1.w);\n" " threadBuffer[lpos_1]=p0_1;\n" " }\n" " local_sum[tId]=threadBuffer[(fin-sta)-1].w;\n" " threadgroup_barrier(mem_flags::mem_threadgroup);\n" " if (fin>sta)\n" " {\n" " for (uint stride=1u; stride <= LOCAL_SIZE/2u; stride *= 2u)\n" " {\n" " uint id=((uint(tId+1)*stride)*2u)-1u;\n" " if (id<LOCAL_SIZE)\n" " {\n" " local_sum[id] += local_sum[id-stride];\n" " }\n" " threadgroup_barrier(mem_flags::mem_threadgroup);\n" " }\n" " for (uint stride_1=LOCAL_SIZE/4u; stride_1>0u; stride_1 /= 2u)\n" " {\n" " uint id_1=((uint(tId+1)*stride_1)*2u)-1u;\n" " if ((id_1+stride_1)<LOCAL_SIZE)\n" " {\n" " uint _220=id_1+stride_1;\n" " local_sum[_220] += local_sum[id_1];\n" " }\n" " threadgroup_barrier(mem_flags::mem_threadgroup);\n" " }\n" " if (tId>0)\n" " {\n" " _233=local_sum[tId-1];\n" " }\n" " else\n" " {\n" " _233=0u;\n" " }\n" " uint sum0=_233;\n" " for (int i_2=sta; i_2<fin; i_2++)\n" " {\n" " int lpos_2=i_2-sta;\n" " uPointoffsetSum.data[i_2]=threadBuffer[lpos_2]+uint4(sum+sum0);\n" " }\n" " sum += local_sum[LOCAL_SIZE-1];\n" " }\n" " curOffset += LOCAL_SIZE*UNIT;\n" " if (curOffset<size)\n" " {\n" " threadgroup_barrier(mem_flags::mem_threadgroup);\n" " }\n" " }\n" "}\n" ; const char* render_shader_radixsort_reorder_option_metal = "using namespace metal;\n" "template<typename T,size_t Num>\n" "struct spvUnsafeArray\n" "{\n" " T elements[Num ? Num : 1];\n" " \n" " thread T& operator [] (size_t pos) thread\n" " {\n" " return elements[pos];\n" " }\n" " constexpr const thread T& operator [] (size_t pos) const thread\n" " {\n" " return elements[pos];\n" " }\n" " \n" " device T& operator [] (size_t pos) device\n" " {\n" " return elements[pos];\n" " }\n" " constexpr const device T& operator [] (size_t pos) const device\n" " {\n" " return elements[pos];\n" " }\n" " \n" " constexpr const constant T& operator [] (size_t pos) const constant\n" " {\n" " return elements[pos];\n" " }\n" " \n" " threadgroup T& operator [] (size_t pos) threadgroup\n" " {\n" " return elements[pos];\n" " }\n" " constexpr const threadgroup T& operator [] (size_t pos) const threadgroup\n" " {\n" " return elements[pos];\n" " }\n" "};\n" "struct variableBuffer\n" "{\n" " uint4 off;\n" "};\n" "struct variablepBuffer\n" "{\n" " uint4 off;\n" "};\n" "struct his\n" "{\n" " uint data[1];\n" "};\n" "struct pointO\n" "{\n" " uint2 data[1];\n" "};\n" "struct pointI\n" "{\n" " uint2 data[1];\n" "};\n" "kernel void main0(device pointI& uPointKeysOutput [[buffer(0)]],const device pointO& uPointKeysInput [[buffer(1)]],const device his& uHistogram [[buffer(2)]],constant variableBuffer& uOffset [[buffer(3)]],constant variablepBuffer& uPass [[buffer(4)]],uint3 gl_NumWorkGroups [[threadgroups_per_grid]],uint3 gl_GlobalInvocationID [[thread_position_in_grid]])\n" "{\n" " uint groupSize=gl_NumWorkGroups.x;\n" " uint threadId=gl_GlobalInvocationID.x;\n" " uint threadNumber=groupSize*LOCAL_SIZE;\n" " uint totalSize=(uOffset.off.x+1u)/2u;\n" " uint size=((totalSize+threadNumber)-1u)/threadNumber;\n" " uint sta=threadId*size;\n" " uint fin=min((sta+size),totalSize);\n" " uint div=uPass.off.x;\n" " sta *= 2u;\n" " fin *= 2u;\n" " uint modNum=BIN_NUMBER-1u;\n" " spvUnsafeArray<uint,BIN_NUMBER> offsets;\n" " for (int i=0; i<BIN_NUMBER; i++)\n" " {\n" " uint pos=(uint(i)*threadNumber)+threadId;\n" " if (pos == 0u)\n" " {\n" " offsets[i]=0u;\n" " }\n" " else\n" " {\n" " offsets[i]=uHistogram.data[pos-1u];\n" " }\n" " }\n" " for (uint i_1=sta; i_1<fin; i_1++)\n" " {\n" " uint2 V=uPointKeysInput.data[i_1];\n" " uint key=(V.x >> div) & modNum;\n" " uint pos_1=offsets[key];\n" " uPointKeysOutput.data[pos_1]=V;\n" " offsets[key]++;\n" " }\n" "}\n" ; const char* render_shader_cumsum_metal = "using namespace metal;\n" "template<typename T,size_t Num>\n" "struct spvUnsafeArray\n" "{\n" " T elements[Num ? Num : 1];\n" " \n" " thread T& operator [] (size_t pos) thread\n" " {\n" " return elements[pos];\n" " }\n" " constexpr const thread T& operator [] (size_t pos) const thread\n" " {\n" " return elements[pos];\n" " }\n" " \n" " device T& operator [] (size_t pos) device\n" " {\n" " return elements[pos];\n" " }\n" " constexpr const device T& operator [] (size_t pos) const device\n" " {\n" " return elements[pos];\n" " }\n" " \n" " constexpr const constant T& operator [] (size_t pos) const constant\n" " {\n" " return elements[pos];\n" " }\n" " \n" " threadgroup T& operator [] (size_t pos) threadgroup\n" " {\n" " return elements[pos];\n" " }\n" " constexpr const threadgroup T& operator [] (size_t pos) const threadgroup\n" " {\n" " return elements[pos];\n" " }\n" "};\n" "struct constBuffer\n" "{\n" " int4 point;\n" "};\n" "struct pointoffset\n" "{\n" " uint4 data[1];\n" "};\n" "struct pointoffsetSum\n" "{\n" " uint4 data[1];\n" "};\n" "constant uint3 gl_WorkGroupSize [[maybe_unused]]=uint3(256u,1u,1u);\n" "kernel void main0(device pointoffsetSum& uPointoffsetSum [[buffer(0)]],const device pointoffset& uPointoffset [[buffer(1)]],constant constBuffer& uConstant [[buffer(2)]],uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])\n" "{\n" " threadgroup uint local_sum[256];\n" " int tId=int(gl_LocalInvocationID.x);\n" " int size=(uConstant.point.x+3)/4;\n" " int curOffset=0;\n" " uint sum=0u;\n" " spvUnsafeArray<uint4,128> threadBuffer;\n" " while (curOffset<size)\n" " {\n" " int sta=(tId*128)+curOffset;\n" " int fin=min((sta+128),size);\n" " for (int i=sta; i<fin; i++)\n" " {\n" " int lpos=i-sta;\n" " uint4 p0=uPointoffset.data[i];\n" " p0.y += p0.x;\n" " p0.z += p0.y;\n" " p0.w += p0.z;\n" " threadBuffer[lpos]=p0;\n" " }\n" " int _112=sta+1;\n" " for (int i_1=_112; i_1<fin; i_1++)\n" " {\n" " int lpos_1=i_1-sta;\n" " uint4 p0_1=threadBuffer[lpos_1];\n" " uint4 p1=threadBuffer[lpos_1-1];\n" " p0_1 += uint4(p1.w);\n" " threadBuffer[lpos_1]=p0_1;\n" " }\n" " local_sum[tId]=threadBuffer[(fin-sta)-1].w;\n" " threadgroup_barrier(mem_flags::mem_threadgroup);\n" " if (fin>sta)\n" " {\n" " for (int i_2=0; i_2<tId; i_2++)\n" " {\n" " sum += local_sum[i_2];\n" " }\n" " for (int i_3=sta; i_3<fin; i_3++)\n" " {\n" " int lpos_2=i_3-sta;\n" " uPointoffsetSum.data[i_3]=threadBuffer[lpos_2]+uint4(sum);\n" " }\n" " for (int i_4=tId; i_4<256; i_4++)\n" " {\n" " sum += local_sum[i_4];\n" " }\n" " }\n" " curOffset += 32768;\n" " if (curOffset<size)\n" " {\n" " threadgroup_barrier(mem_flags::mem_threadgroup);\n" " }\n" " }\n" "}\n" ; const char* render_shader_rastersort_count_valid_number_metal = "using namespace metal;\n" "constant uint _25=(uint(LOCAL_SIZE)+0u);\n" "struct constBuffer\n" "{\n" " uint4 point;\n" "};\n" "struct sourceBuffer1\n" "{\n" " float4 data[1];\n" "};\n" "struct sourceBuffer0\n" "{\n" " TYPE data[1];\n" "};\n" "struct histogram\n" "{\n" " uint data[1];\n" "};\n" "#ifndef SPIRV_CROSS_CONSTANT_ID_0\n" "#define SPIRV_CROSS_CONSTANT_ID_0 1u\n" "#endif\n" "constant uint _168=SPIRV_CROSS_CONSTANT_ID_0;\n" "#ifndef SPIRV_CROSS_CONSTANT_ID_1\n" "#define SPIRV_CROSS_CONSTANT_ID_1 1u\n" "#endif\n" "constant uint _169=SPIRV_CROSS_CONSTANT_ID_1;\n" "#ifndef SPIRV_CROSS_CONSTANT_ID_2\n" "#define SPIRV_CROSS_CONSTANT_ID_2 1u\n" "#endif\n" "constant uint _170=SPIRV_CROSS_CONSTANT_ID_2;\n" "constant uint3 gl_WorkGroupSize [[maybe_unused]]=uint3(_168,_169,_170);\n" "kernel void main0(device histogram& uHistogram [[buffer(0)]],const device sourceBuffer0& uAttr [[buffer(1)]],const device sourceBuffer1& uViewProj [[buffer(2)]],constant constBuffer& uConstant [[buffer(3)]],uint3 gl_NumWorkGroups [[threadgroups_per_grid]],uint3 gl_GlobalInvocationID [[thread_position_in_grid]])\n" "{\n" " uint groupSize=gl_NumWorkGroups.x;\n" " uint threadId=gl_GlobalInvocationID.x;\n" " uint binSize=0u;\n" " uint threadNumber=groupSize*LOCAL_SIZE;\n" " uint totalSize=uConstant.point.x;\n" " uint size=((totalSize+threadNumber)-1u)/threadNumber;\n" " uint sta=threadId*size;\n" " uint fin=min((sta+size),totalSize);\n" " for (uint pos=sta; pos<fin; pos++)\n" " {\n" " float4 vp0=uViewProj.data[0];\n" " float4 vp1=uViewProj.data[1];\n" " float4 vp2=uViewProj.data[2];\n" " float4 vp3=uViewProj.data[3];\n" " float4 attr=float4(uAttr.data[pos]);\n" " float depth=(((attr.x*vp0.z)+(attr.y*vp1.z))+(attr.z*vp2.z))+vp3.z;\n" " float dw=(((attr.x*vp0.w)+(attr.y*vp1.w))+(attr.z*vp2.w))+vp3.w;\n" " depth /= dw;\n" " if ((depth >= 0.0) && (depth <= 1.0))\n" " {\n" " binSize++;\n" " }\n" " }\n" " uHistogram.data[threadId]=binSize;\n" "}\n" ; const char* render_shader_int2float_metal = "using namespace metal;\n" "struct constBuffer\n" "{\n" " int4 size;\n" " float4 scale;\n" "};\n" "struct destBuffer\n" "{\n" " float data[1];\n" "};\n" "struct sourceBuffer0\n" "{\n" " int data[1];\n" "};\n" "constant uint3 gl_WorkGroupSize [[maybe_unused]]=uint3(256u,1u,1u);\n" "kernel void main0(device destBuffer& uOutput [[buffer(0)]],const device sourceBuffer0& uInput [[buffer(1)]],constant constBuffer& uConstant [[buffer(2)]],uint3 gl_GlobalInvocationID [[thread_position_in_grid]])\n" "{\n" " int pos=int(gl_GlobalInvocationID.x);\n" " int4 size=uConstant.size;\n" " if (pos<size.x)\n" " {\n" " uOutput.data[pos]=(float(uInput.data[pos])*uConstant.scale.x)+uConstant.scale.y;\n" " }\n" "}\n" ; const char* render_shader_dfdxdy_metal = "using namespace metal;\n" "struct constBuffer\n" "{\n" " int4 size;\n" "};\n" "struct destBuffer\n" "{\n" " float data[1];\n" "};\n" "struct sourceBuffer0\n" "{\n" " float data[1];\n" "};\n" "struct destBuffer1\n" "{\n" " float data[1];\n" "};\n" "constant uint3 gl_WorkGroupSize [[maybe_unused]]=uint3(256u,1u,1u);\n" "kernel void main0(device destBuffer& uDx [[buffer(0)]],device destBuffer1& uDy [[buffer(1)]],const device sourceBuffer0& uInput [[buffer(2)]],constant constBuffer& uConstant [[buffer(3)]],uint3 gl_GlobalInvocationID [[thread_position_in_grid]])\n" "{\n" " int pos=int(gl_GlobalInvocationID.x);\n" " int4 size=uConstant.size;\n" " int total=((size.x*size.y)*size.z)*size.w;\n" " if (pos<total)\n" " {\n" " int x=pos % size.x;\n" " int tmp=pos/size.x;\n" " int y=tmp % size.y;\n" " tmp /= size.y;\n" " int z=tmp % size.z;\n" " int n=tmp/size.z;\n" " int outPos=(((((n*size.x)*size.y)*size.z)+(x*size.z))+((y*size.x)*size.z))+z;\n" " int xDPos=(((((n*size.x)*size.y)*size.z)+((x+1)*size.z))+((y*size.x)*size.z))+z;\n" " int yDPos=(((((n*size.x)*size.y)*size.z)+(x*size.z))+(((y+1)*size.x)*size.z))+z;\n" " if (x<(size.x-1))\n" " {\n" " uDx.data[outPos]=uInput.data[xDPos]-uInput.data[outPos];\n" " }\n" " else\n" " {\n" " uDx.data[outPos]=0.0;\n" " }\n" " if (y<(size.y-1))\n" " {\n" " uDy.data[outPos]=uInput.data[yDPos]-uInput.data[outPos];\n" " }\n" " else\n" " {\n" " uDy.data[outPos]=0.0;\n" " }\n" " }\n" "}\n" ; const char* render_shader_radixsort_histogram_option_metal = "using namespace metal;\n" "template<typename T,size_t Num>\n" "struct spvUnsafeArray\n" "{\n" " T elements[Num ? Num : 1];\n" " \n" " thread T& operator [] (size_t pos) thread\n" " {\n" " return elements[pos];\n" " }\n" " constexpr const thread T& operator [] (size_t pos) const thread\n" " {\n" " return elements[pos];\n" " }\n" " \n" " device T& operator [] (size_t pos) device\n" " {\n" " return elements[pos];\n" " }\n" " constexpr const device T& operator [] (size_t pos) const device\n" " {\n" " return elements[pos];\n" " }\n" " \n" " constexpr const constant T& operator [] (size_t pos) const constant\n" " {\n" " return elements[pos];\n" " }\n" " \n" " threadgroup T& operator [] (size_t pos) threadgroup\n" " {\n" " return elements[pos];\n" " }\n" " constexpr const threadgroup T& operator [] (size_t pos) const threadgroup\n" " {\n" " return elements[pos];\n" " }\n" "};\n" "struct variableBuffer\n" "{\n" " uint4 off;\n" "};\n" "struct variablepBuffer\n" "{\n" " uint4 off;\n" "};\n" "struct pointO\n" "{\n" " uint4 data[1];\n" "};\n" "struct pointI\n" "{\n" " uint data[1];\n" "};\n" "kernel void main0(device pointI& uHistogram [[buffer(0)]],const device pointO& uPointKeysInput [[buffer(1)]],constant variableBuffer& uOffset [[buffer(2)]],constant variablepBuffer& uPass [[buffer(3)]],uint3 gl_NumWorkGroups [[threadgroups_per_grid]],uint3 gl_GlobalInvocationID [[thread_position_in_grid]])\n" "{\n" " uint groupSize=gl_NumWorkGroups.x;\n" " uint threadId=gl_GlobalInvocationID.x;\n" " spvUnsafeArray<uint,BIN_NUMBER> binSize;\n" " for (int i=0; i<BIN_NUMBER; i++)\n" " {\n" " binSize[i]=0u;\n" " }\n" " uint totalSize=(uOffset.off.x+1u)/2u;\n" " uint threadNumber=groupSize*LOCAL_SIZE;\n" " uint size=((totalSize+threadNumber)-1u)/threadNumber;\n" " uint sta=threadId*size;\n" " uint fin=min((sta+size),totalSize);\n" " uint div=uPass.off.x;\n" " uint modNum=BIN_NUMBER-1u;\n" " for (uint i_1=sta; i_1<fin; i_1++)\n" " {\n" " uint2 key=(uPointKeysInput.data[i_1].xz >> uint2(div)) & uint2(modNum);\n" " binSize[key.x]++;\n" " binSize[key.y]++;\n" " }\n" " for (int i_2=0; i_2<BIN_NUMBER; i_2++)\n" " {\n" " uHistogram.data[(uint(i_2)*threadNumber)+threadId]=binSize[i_2];\n" " }\n" "}\n" ; const char* render_shader_dfdxdygrad_metal = "using namespace metal;\n" "struct constBuffer\n" "{\n" " int4 size;\n" "};\n" "struct destBuffer\n" "{\n" " float data[1];\n" "};\n" "struct destBuffer1\n" "{\n" " float data[1];\n" "};\n" "struct sourceBuffer0\n" "{\n" " float data[1];\n" "};\n" "constant uint3 gl_WorkGroupSize [[maybe_unused]]=uint3(256u,1u,1u);\n" "kernel void main0(const device destBuffer& uDx [[buffer(0)]],const device destBuffer1& uDy [[buffer(1)]],device sourceBuffer0& uInput [[buffer(2)]],constant constBuffer& uConstant [[buffer(3)]],uint3 gl_GlobalInvocationID [[thread_position_in_grid]])\n" "{\n" " int pos=int(gl_GlobalInvocationID.x);\n" " int4 size=uConstant.size;\n" " int total=((size.x*size.y)*size.z)*size.w;\n" " if (pos<total)\n" " {\n" " int x=pos % size.x;\n" " int tmp=pos/size.x;\n" " int y=tmp % size.y;\n" " tmp /= size.y;\n" " int z=tmp % size.z;\n" " int n=tmp/size.z;\n" " int outPos=(((((n*size.x)*size.y)*size.z)+(x*size.z))+((y*size.x)*size.z))+z;\n" " int xDPos=(((((n*size.x)*size.y)*size.z)+((x-1)*size.z))+((y*size.x)*size.z))+z;\n" " int yDPos=(((((n*size.x)*size.y)*size.z)+(x*size.z))+(((y-1)*size.x)*size.z))+z;\n" " float summer=0.0;\n" " if (x>0)\n" " {\n" " summer=(summer+uDx.data[xDPos])-uDx.data[outPos];\n" " }\n" " if (y>0)\n" " {\n" " summer=(summer+uDy.data[yDPos])-uDy.data[outPos];\n" " }\n" " uInput.data[outPos]=summer;\n" " }\n" "}\n" ; const char* render_shader_texture2d_metal = "using namespace metal;\n" "struct gridSampleBuffer\n" "{\n" " int4 inShape;\n" " int4 outShape;\n" " uint alignCorners;\n" "};\n" "struct sourceBuffer0\n" "{\n" " float data[1];\n" "};\n" "struct sourceBuffer1\n" "{\n" " float data[1];\n" "};\n" "struct destBuffer\n" "{\n" " float data[1];\n" "};\n" "constant uint3 gl_WorkGroupSize [[maybe_unused]]=uint3(256u,1u,1u);\n" "static inline __attribute__((always_inline))\n" "float LoadSample(thread int& positionX,thread int& positionY,thread const int& c,thread const int& n,constant gridSampleBuffer& uGridSampleParam,const device sourceBuffer0& uInput)\n" "{\n" " int width=uGridSampleParam.inShape.x;\n" " int height=uGridSampleParam.inShape.y;\n" " positionX=clamp(positionX,0,width-1);\n" " positionY=clamp(positionY,0,height-1);\n" " float V=uInput.data[(((0+(positionX*uGridSampleParam.inShape.z))+((positionY*width)*uGridSampleParam.inShape.z))+(((n*width)*height)*uGridSampleParam.inShape.z))+c];\n" " return V;\n" "}\n" "kernel void main0(device destBuffer& uOutput [[buffer(0)]],const device sourceBuffer0& uInput [[buffer(1)]],const device sourceBuffer1& uGrid [[buffer(2)]],constant gridSampleBuffer& uGridSampleParam [[buffer(3)]],uint3 gl_GlobalInvocationID [[thread_position_in_grid]])\n" "{\n" " int pos=int(gl_GlobalInvocationID.x);\n" " int4 inputShape=uGridSampleParam.inShape;\n" " int4 outputShape=uGridSampleParam.outShape;\n" " int total=((outputShape.x*outputShape.y)*outputShape.z)*outputShape.w;\n" " if (pos<total)\n" " {\n" " int x=pos % outputShape.x;\n" " int tmp=pos/outputShape.x;\n" " int y=tmp % outputShape.y;\n" " tmp /= outputShape.y;\n" " int z=tmp % outputShape.z;\n" " int n=tmp/outputShape.z;\n" " int gridPosition=(((n*outputShape.x)*outputShape.y)+(y*outputShape.x))+x;\n" " float gridX=uGrid.data[(inputShape.w*gridPosition)+0];\n" " float gridY=uGrid.data[(inputShape.w*gridPosition)+1];\n" " float cordH=(gridY*float(inputShape.y))-0.5;\n" " float cordW=(gridX*float(inputShape.x))-0.5;\n" " int w0_h=int(floor(cordH));\n" " int w0_w=int(floor(cordW));\n" " int w1_h=w0_h+1;\n" " int w1_w=w0_w+1;\n" " float oneV=1.0;\n" " int param=w0_w;\n" " int param_1=w0_h;\n" " int param_2=z;\n" " int param_3=n;\n" " float _215=LoadSample(param,param_1,param_2,param_3,uGridSampleParam,uInput);\n" " float i00=_215;\n" " int param_4=w1_w;\n" " int param_5=w0_h;\n" " int param_6=z;\n" " int param_7=n;\n" " float _225=LoadSample(param_4,param_5,param_6,param_7,uGridSampleParam,uInput);\n" " float i01=_225;\n" " int param_8=w0_w;\n" " int param_9=w1_h;\n" " int param_10=z;\n" " int param_11=n;\n" " float _235=LoadSample(param_8,param_9,param_10,param_11,uGridSampleParam,uInput);\n" " float i10=_235;\n" " int param_12=w1_w;\n" " int param_13=w1_h;\n" " int param_14=z;\n" " int param_15=n;\n" " float _245=LoadSample(param_12,param_13,param_14,param_15,uGridSampleParam,uInput);\n" " float i11=_245;\n" " float f0=float(w1_w)-cordW;\n" " float f1=oneV-f0;\n" " float h0=float(w1_h)-cordH;\n" " float h1=oneV-h0;\n" " float i0=(i00*f0)+(i01*f1);\n" " float i1=(i10*f0)+(i11*f1);\n" " float V=(i0*h0)+(i1*h1);\n" " uOutput.data[(((0+(x*outputShape.z))+((y*outputShape.x)*outputShape.z))+z)+(((n*outputShape.x)*outputShape.y)*outputShape.z)]=V;\n" " }\n" "}\n" ; const char* render_shader_texturecubegrad_metal = "using namespace metal;\n" "struct gridSampleBuffer\n" "{\n" " int4 inShape;\n" " int4 outShape;\n" " uint alignCorners;\n" "};\n" "struct sourceBuffer0\n" "{\n" " int data[1];\n" "};\n" "struct sourceBuffer1\n" "{\n" " float data[1];\n" "};\n" "struct destBuffer\n" "{\n" " float data[1];\n" "};\n" "constant uint3 gl_WorkGroupSize [[maybe_unused]]=uint3(256u,1u,1u);\n" "static inline __attribute__((always_inline))\n" "void indexCubeMap(thread const float3& d,thread int& face,thread float& s,thread float& t)\n" "{\n" " float3 absd;\n" " absd.x=abs(d.x);\n" " absd.y=abs(d.y);\n" " absd.z=abs(d.z);\n" " face=-1;\n" " bool _50=absd.x >= absd.y;\n" " bool _58;\n" " if (_50)\n" " {\n" " _58=absd.x >= absd.z;\n" " }\n" " else\n" " {\n" " _58=_50;\n" " }\n" " float sc;\n" " float tc;\n" " float ma;\n" " if (_58)\n" " {\n" " if (d.x>0.0)\n" " {\n" " face=0;\n" " sc=-d.z;\n" " tc=-d.y;\n" " ma=absd.x;\n" " }\n" " else\n" " {\n" " face=1;\n" " sc=d.z;\n" " tc=-d.y;\n" " ma=absd.x;\n" " }\n" " }\n" " bool _92=absd.y >= absd.x;\n" " bool _100;\n" " if (_92)\n" " {\n" " _100=absd.y >= absd.z;\n" " }\n" " else\n" " {\n" " _100=_92;\n" " }\n" " if (_100)\n" " {\n" " if (d.y>0.0)\n" " {\n" " face=2;\n" " sc=d.x;\n" " tc=d.z;\n" " ma=absd.y;\n" " }\n" " else\n" " {\n" " face=3;\n" " sc=d.x;\n" " tc=-d.z;\n" " ma=absd.y;\n" " }\n" " }\n" " bool _128=absd.z >= absd.x;\n" " bool _136;\n" " if (_128)\n" " {\n" " _136=absd.z >= absd.y;\n" " }\n" " else\n" " {\n" " _136=_128;\n" " }\n" " if (_136)\n" " {\n" " if (d.z>0.0)\n" " {\n" " face=4;\n" " sc=d.x;\n" " tc=-d.y;\n" " ma=absd.z;\n" " }\n" " else\n" " {\n" " face=5;\n" " sc=-d.x;\n" " tc=-d.y;\n" " ma=absd.z;\n" " }\n" " }\n" " if (ma == 0.0)\n" " {\n" " s=0.0;\n" " t=0.0;\n" " face=-1;\n" " }\n" " else\n" " {\n" " s=((sc/ma)+1.0)*0.5;\n" " t=((tc/ma)+1.0)*0.5;\n" " }\n" "}\n" "static inline __attribute__((always_inline))\n" "void WriteSample(thread int& positionX,thread int& positionY,thread const int& c,thread const int& n,thread const float& V_f,constant gridSampleBuffer& uGridSampleParam,device sourceBuffer0& uInput)\n" "{\n" " int V=int(V_f*16777216.0);\n" " int width=uGridSampleParam.inShape.x;\n" " int height=uGridSampleParam.inShape.y;\n" " positionX=clamp(positionX,0,width-1);\n" " positionY=clamp(positionY,0,height-1);\n" " int _232=atomic_fetch_add_explicit((device atomic_int*)&uInput.data[(((0+(positionX*uGridSampleParam.inShape.z))+((positionY*width)*uGridSampleParam.inShape.z))+(((n*width)*height)*uGridSampleParam.inShape.z))+c],V,memory_order_relaxed);\n" "}\n" "kernel void main0(const device destBuffer& uOutput [[buffer(0)]],device sourceBuffer0& uInput [[buffer(1)]],const device sourceBuffer1& uGrid [[buffer(2)]],constant gridSampleBuffer& uGridSampleParam [[buffer(3)]],uint3 gl_GlobalInvocationID [[thread_position_in_grid]])\n" "{\n" " int pos=int(gl_GlobalInvocationID.x);\n" " int4 inputShape=uGridSampleParam.inShape;\n" " int4 outputShape=uGridSampleParam.outShape;\n" " int total=((outputShape.x*outputShape.y)*outputShape.z)*outputShape.w;\n" " if (pos<total)\n" " {\n" " int x=pos % outputShape.x;\n" " int tmp=pos/outputShape.x;\n" " int y=tmp % outputShape.y;\n" " tmp /= outputShape.y;\n" " int z=tmp % outputShape.z;\n" " int on=tmp/outputShape.z;\n" " int gridPosition=(((on*outputShape.x)*outputShape.y)+(y*outputShape.x))+x;\n" " float u=uGrid.data[(inputShape.w*gridPosition)+0];\n" " float v=uGrid.data[(inputShape.w*gridPosition)+1];\n" " float w=uGrid.data[(inputShape.w*gridPosition)+2];\n" " float3 param=float3(u,v,w);\n" " int param_1;\n" " float param_2;\n" " float param_3;\n" " indexCubeMap(param,param_1,param_2,param_3);\n" " int face=param_1;\n" " float gridX=param_2;\n" " float gridY=param_3;\n" " float V=uOutput.data[(((0+(x*outputShape.z))+((y*outputShape.x)*outputShape.z))+z)+(((on*outputShape.x)*outputShape.y)*outputShape.z)];\n" " if (face >= 0)\n" " {\n" " int n=(on*6)+face;\n" " float cordH=(gridY*float(inputShape.y))-0.5;\n" " float cordW=(gridX*float(inputShape.x))-0.5;\n" " int w0_h=int(floor(cordH));\n" " int w0_w=int(floor(cordW));\n" " int w1_h=w0_h+1;\n" " int w1_w=w0_w+1;\n" " float f0=float(w1_w)-cordW;\n" " float f1=1.0-f0;\n" " float h0=float(w1_h)-cordH;\n" " float h1=1.0-h0;\n" " float f00=(f0*h0)*V;\n" " float f01=(f1*h0)*V;\n" " float f10=(f0*h1)*V;\n" " float f11=(f1*h1)*V;\n" " int param_4=w0_w;\n" " int param_5=w0_h;\n" " int param_6=z;\n" " int param_7=n;\n" " float param_8=f00;\n" " WriteSample(param_4,param_5,param_6,param_7,param_8,uGridSampleParam,uInput);\n" " int param_9=w1_w;\n" " int param_10=w0_h;\n" " int param_11=z;\n" " int param_12=n;\n" " float param_13=f01;\n" " WriteSample(param_9,param_10,param_11,param_12,param_13,uGridSampleParam,uInput);\n" " int param_14=w0_w;\n" " int param_15=w1_h;\n" " int param_16=z;\n" " int param_17=n;\n" " float param_18=f10;\n" " WriteSample(param_14,param_15,param_16,param_17,param_18,uGridSampleParam,uInput);\n" " int param_19=w1_w;\n" " int param_20=w1_h;\n" " int param_21=z;\n" " int param_22=n;\n" " float param_23=f11;\n" " WriteSample(param_19,param_20,param_21,param_22,param_23,uGridSampleParam,uInput);\n" " }\n" " }\n" "}\n" ;