source/backend/metal/render/AllRenderShader.cpp (1,356 lines of code) (raw):
#include "AllRenderShader.hpp"
const char* render_shader_float2int_metal =
"using namespace metal;\n"
"struct constBuffer\n"
"{\n"
" int4 size;\n"
" float4 scale;\n"
"};\n"
"struct destBuffer\n"
"{\n"
" int data[1];\n"
"};\n"
"struct sourceBuffer0\n"
"{\n"
" float data[1];\n"
"};\n"
"constant uint3 gl_WorkGroupSize [[maybe_unused]]=uint3(256u,1u,1u);\n"
"kernel void main0(device destBuffer& uOutput [[buffer(0)]],const device sourceBuffer0& uInput [[buffer(1)]],constant constBuffer& uConstant [[buffer(2)]],uint3 gl_GlobalInvocationID [[thread_position_in_grid]])\n"
"{\n"
" int pos=int(gl_GlobalInvocationID.x);\n"
" int4 size=uConstant.size;\n"
" if (pos<size.x)\n"
" {\n"
" uOutput.data[pos]=int((uInput.data[pos]*uConstant.scale.x)+uConstant.scale.y);\n"
" }\n"
"}\n"
;
const char* render_shader_texture2dgrad_metal =
"using namespace metal;\n"
"struct gridSampleBuffer\n"
"{\n"
" int4 inShape;\n"
" int4 outShape;\n"
" uint alignCorners;\n"
"};\n"
"struct sourceBuffer0\n"
"{\n"
" int data[1];\n"
"};\n"
"struct sourceBuffer1\n"
"{\n"
" float data[1];\n"
"};\n"
"struct destBuffer\n"
"{\n"
" float data[1];\n"
"};\n"
"constant uint3 gl_WorkGroupSize [[maybe_unused]]=uint3(256u,1u,1u);\n"
"static inline __attribute__((always_inline))\n"
"void WriteSample(thread int& positionX,thread int& positionY,thread const int& c,thread const int& n,thread const float& V_f,constant gridSampleBuffer& uGridSampleParam,device sourceBuffer0& uInput)\n"
"{\n"
" int V=int(V_f*16777216.0);\n"
" int width=uGridSampleParam.inShape.x;\n"
" int height=uGridSampleParam.inShape.y;\n"
" positionX=clamp(positionX,0,width-1);\n"
" positionY=clamp(positionY,0,height-1);\n"
" int _77=atomic_fetch_add_explicit((device atomic_int*)&uInput.data[(((0+(positionX*uGridSampleParam.inShape.z))+((positionY*width)*uGridSampleParam.inShape.z))+(((n*width)*height)*uGridSampleParam.inShape.z))+c],V,memory_order_relaxed);\n"
"}\n"
"kernel void main0(const device destBuffer& uOutput [[buffer(0)]],device sourceBuffer0& uInput [[buffer(1)]],const device sourceBuffer1& uGrid [[buffer(2)]],constant gridSampleBuffer& uGridSampleParam [[buffer(3)]],uint3 gl_GlobalInvocationID [[thread_position_in_grid]])\n"
"{\n"
" int pos=int(gl_GlobalInvocationID.x);\n"
" int4 inputShape=uGridSampleParam.inShape;\n"
" int4 outputShape=uGridSampleParam.outShape;\n"
" int total=((outputShape.x*outputShape.y)*outputShape.z)*outputShape.w;\n"
" if (pos<total)\n"
" {\n"
" int x=pos % outputShape.x;\n"
" int tmp=pos/outputShape.x;\n"
" int y=tmp % outputShape.y;\n"
" tmp /= outputShape.y;\n"
" int z=tmp % outputShape.z;\n"
" int n=tmp/outputShape.z;\n"
" int gridPosition=(((n*outputShape.x)*outputShape.y)+(y*outputShape.x))+x;\n"
" float gridX=uGrid.data[(inputShape.w*gridPosition)+0];\n"
" float gridY=uGrid.data[(inputShape.w*gridPosition)+1];\n"
" float V=uOutput.data[(((0+(x*outputShape.z))+((y*outputShape.x)*outputShape.z))+z)+(((n*outputShape.x)*outputShape.y)*outputShape.z)];\n"
" float cordH=(gridY*float(inputShape.y))-0.5;\n"
" float cordW=(gridX*float(inputShape.x))-0.5;\n"
" int w0_h=int(floor(cordH));\n"
" int w0_w=int(floor(cordW));\n"
" int w1_h=w0_h+1;\n"
" int w1_w=w0_w+1;\n"
" float f0=float(w1_w)-cordW;\n"
" float f1=1.0-f0;\n"
" float h0=float(w1_h)-cordH;\n"
" float h1=1.0-h0;\n"
" float f00=(f0*h0)*V;\n"
" float f01=(f1*h0)*V;\n"
" float f10=(f0*h1)*V;\n"
" float f11=(f1*h1)*V;\n"
" int param=w0_w;\n"
" int param_1=w0_h;\n"
" int param_2=z;\n"
" int param_3=n;\n"
" float param_4=f00;\n"
" WriteSample(param,param_1,param_2,param_3,param_4,uGridSampleParam,uInput);\n"
" int param_5=w1_w;\n"
" int param_6=w0_h;\n"
" int param_7=z;\n"
" int param_8=n;\n"
" float param_9=f01;\n"
" WriteSample(param_5,param_6,param_7,param_8,param_9,uGridSampleParam,uInput);\n"
" int param_10=w0_w;\n"
" int param_11=w1_h;\n"
" int param_12=z;\n"
" int param_13=n;\n"
" float param_14=f10;\n"
" WriteSample(param_10,param_11,param_12,param_13,param_14,uGridSampleParam,uInput);\n"
" int param_15=w1_w;\n"
" int param_16=w1_h;\n"
" int param_17=z;\n"
" int param_18=n;\n"
" float param_19=f11;\n"
" WriteSample(param_15,param_16,param_17,param_18,param_19,uGridSampleParam,uInput);\n"
" }\n"
"}\n"
;
const char* render_shader_rastersort_collect_key_metal =
"using namespace metal;\n"
"struct constBuffer\n"
"{\n"
" uint4 point;\n"
"};\n"
"struct histogram\n"
"{\n"
" uint data[1];\n"
"};\n"
"struct pointkeys\n"
"{\n"
" uint2 data[1];\n"
"};\n"
"struct sourceBuffer1\n"
"{\n"
" float4 data[1];\n"
"};\n"
"struct sourceBuffer0\n"
"{\n"
" TYPE data[1];\n"
"};\n"
"#ifndef SPIRV_CROSS_CONSTANT_ID_0\n"
"#define SPIRV_CROSS_CONSTANT_ID_0 1u\n"
"#endif\n"
"constant uint _210=SPIRV_CROSS_CONSTANT_ID_0;\n"
"#ifndef SPIRV_CROSS_CONSTANT_ID_1\n"
"#define SPIRV_CROSS_CONSTANT_ID_1 1u\n"
"#endif\n"
"constant uint _211=SPIRV_CROSS_CONSTANT_ID_1;\n"
"#ifndef SPIRV_CROSS_CONSTANT_ID_2\n"
"#define SPIRV_CROSS_CONSTANT_ID_2 1u\n"
"#endif\n"
"constant uint _212=SPIRV_CROSS_CONSTANT_ID_2;\n"
"constant uint3 gl_WorkGroupSize [[maybe_unused]]=uint3(_210,_211,_212);\n"
"kernel void main0(device pointkeys& uPointKeys [[buffer(0)]],const device sourceBuffer0& uAttr [[buffer(1)]],const device sourceBuffer1& uViewProj [[buffer(2)]],const device histogram& uHistogram [[buffer(3)]],constant constBuffer& uConstant [[buffer(4)]],uint3 gl_NumWorkGroups [[threadgroups_per_grid]],uint3 gl_GlobalInvocationID [[thread_position_in_grid]])\n"
"{\n"
" uint groupSize=gl_NumWorkGroups.x;\n"
" uint threadId=gl_GlobalInvocationID.x;\n"
" uint threadNumber=groupSize*LOCAL_SIZE;\n"
" uint totalSize=uConstant.point.x;\n"
" uint size=((totalSize+threadNumber)-1u)/threadNumber;\n"
" uint sta=threadId*size;\n"
" uint fin=min((sta+size),totalSize);\n"
" uint offset=0u;\n"
" if (threadId>0u)\n"
" {\n"
" offset=uHistogram.data[threadId-1u];\n"
" }\n"
" uint sortSize=uHistogram.data[threadNumber-1u];\n"
" if (threadId == 0u)\n"
" {\n"
" if ((sortSize % 2u)>0u)\n"
" {\n"
" ((device uint*)&uPointKeys.data[sortSize])[0u]=32767u;\n"
" ((device uint*)&uPointKeys.data[sortSize])[1u]=0u;\n"
" }\n"
" }\n"
" uint2 kv;\n"
" for (uint pos=sta; pos<fin; pos++)\n"
" {\n"
" float4 vp0=uViewProj.data[0];\n"
" float4 vp1=uViewProj.data[1];\n"
" float4 vp2=uViewProj.data[2];\n"
" float4 vp3=uViewProj.data[3];\n"
" float4 attr=float4(uAttr.data[pos]);\n"
" float depth=(((attr.x*vp0.z)+(attr.y*vp1.z))+(attr.z*vp2.z))+vp3.z;\n"
" float dw=(((attr.x*vp0.w)+(attr.y*vp1.w))+(attr.z*vp2.w))+vp3.w;\n"
" depth /= dw;\n"
" if ((depth >= 0.0) && (depth <= 1.0))\n"
" {\n"
" kv.x=uint(depth*32767.0);\n"
" kv.y=pos;\n"
" uPointKeys.data[offset]=kv;\n"
" offset++;\n"
" }\n"
" }\n"
"}\n"
;
const char* render_shader_radixsort_histogram_metal =
"using namespace metal;\n"
"template<typename T,size_t Num>\n"
"struct spvUnsafeArray\n"
"{\n"
" T elements[Num ? Num : 1];\n"
" \n"
" thread T& operator [] (size_t pos) thread\n"
" {\n"
" return elements[pos];\n"
" }\n"
" constexpr const thread T& operator [] (size_t pos) const thread\n"
" {\n"
" return elements[pos];\n"
" }\n"
" \n"
" device T& operator [] (size_t pos) device\n"
" {\n"
" return elements[pos];\n"
" }\n"
" constexpr const device T& operator [] (size_t pos) const device\n"
" {\n"
" return elements[pos];\n"
" }\n"
" \n"
" constexpr const constant T& operator [] (size_t pos) const constant\n"
" {\n"
" return elements[pos];\n"
" }\n"
" \n"
" threadgroup T& operator [] (size_t pos) threadgroup\n"
" {\n"
" return elements[pos];\n"
" }\n"
" constexpr const threadgroup T& operator [] (size_t pos) const threadgroup\n"
" {\n"
" return elements[pos];\n"
" }\n"
"};\n"
"struct variableBuffer\n"
"{\n"
" uint4 off;\n"
"};\n"
"struct variablepBuffer\n"
"{\n"
" uint4 off;\n"
"};\n"
"struct pointO\n"
"{\n"
" uint4 data[1];\n"
"};\n"
"struct pointI\n"
"{\n"
" uint data[1];\n"
"};\n"
"constant uint3 gl_WorkGroupSize [[maybe_unused]]=uint3(256u,1u,1u);\n"
"kernel void main0(device pointI& uHistogram [[buffer(0)]],const device pointO& uPointKeysInput [[buffer(1)]],constant variableBuffer& uOffset [[buffer(2)]],constant variablepBuffer& uPass [[buffer(3)]],uint3 gl_NumWorkGroups [[threadgroups_per_grid]],uint3 gl_GlobalInvocationID [[thread_position_in_grid]])\n"
"{\n"
" uint groupSize=gl_NumWorkGroups.x;\n"
" uint threadId=gl_GlobalInvocationID.x;\n"
" spvUnsafeArray<uint,256> binSize;\n"
" for (int i=0; i<256; i++)\n"
" {\n"
" binSize[i]=0u;\n"
" }\n"
" uint totalSize=(uOffset.off.x+1u)/2u;\n"
" uint threadNumber=groupSize*256u;\n"
" uint size=((totalSize+threadNumber)-1u)/threadNumber;\n"
" uint sta=threadId*size;\n"
" uint fin=min((sta+size),totalSize);\n"
" uint pass=uPass.off.x;\n"
" uint div=1u;\n"
" for (uint i_1=0u; i_1<pass; i_1++)\n"
" {\n"
" div *= 256u;\n"
" }\n"
" for (uint i_2=sta; i_2<fin; i_2++)\n"
" {\n"
" uint2 key=uPointKeysInput.data[i_2].xz/uint2(div);\n"
" key %= uint2(256u);\n"
" binSize[key.x]++;\n"
" binSize[key.y]++;\n"
" }\n"
" for (int i_3=0; i_3<256; i_3++)\n"
" {\n"
" uHistogram.data[(uint(i_3)*threadNumber)+threadId]=binSize[i_3];\n"
" }\n"
"}\n"
;
const char* render_shader_texturecube_metal =
"using namespace metal;\n"
"struct gridSampleBuffer\n"
"{\n"
" int4 inShape;\n"
" int4 outShape;\n"
" uint alignCorners;\n"
"};\n"
"struct sourceBuffer0\n"
"{\n"
" float data[1];\n"
"};\n"
"struct sourceBuffer1\n"
"{\n"
" float data[1];\n"
"};\n"
"struct destBuffer\n"
"{\n"
" float data[1];\n"
"};\n"
"constant uint3 gl_WorkGroupSize [[maybe_unused]]=uint3(256u,1u,1u);\n"
"static inline __attribute__((always_inline))\n"
"void indexCubeMap(thread const float3& d,thread int& face,thread float& s,thread float& t)\n"
"{\n"
" float3 absd;\n"
" absd.x=abs(d.x);\n"
" absd.y=abs(d.y);\n"
" absd.z=abs(d.z);\n"
" face=-1;\n"
" bool _49=absd.x >= absd.y;\n"
" bool _57;\n"
" if (_49)\n"
" {\n"
" _57=absd.x >= absd.z;\n"
" }\n"
" else\n"
" {\n"
" _57=_49;\n"
" }\n"
" float sc;\n"
" float tc;\n"
" float ma;\n"
" if (_57)\n"
" {\n"
" if (d.x>0.0)\n"
" {\n"
" face=0;\n"
" sc=-d.z;\n"
" tc=-d.y;\n"
" ma=absd.x;\n"
" }\n"
" else\n"
" {\n"
" face=1;\n"
" sc=d.z;\n"
" tc=-d.y;\n"
" ma=absd.x;\n"
" }\n"
" }\n"
" bool _91=absd.y >= absd.x;\n"
" bool _99;\n"
" if (_91)\n"
" {\n"
" _99=absd.y >= absd.z;\n"
" }\n"
" else\n"
" {\n"
" _99=_91;\n"
" }\n"
" if (_99)\n"
" {\n"
" if (d.y>0.0)\n"
" {\n"
" face=2;\n"
" sc=d.x;\n"
" tc=d.z;\n"
" ma=absd.y;\n"
" }\n"
" else\n"
" {\n"
" face=3;\n"
" sc=d.x;\n"
" tc=-d.z;\n"
" ma=absd.y;\n"
" }\n"
" }\n"
" bool _127=absd.z >= absd.x;\n"
" bool _135;\n"
" if (_127)\n"
" {\n"
" _135=absd.z >= absd.y;\n"
" }\n"
" else\n"
" {\n"
" _135=_127;\n"
" }\n"
" if (_135)\n"
" {\n"
" if (d.z>0.0)\n"
" {\n"
" face=4;\n"
" sc=d.x;\n"
" tc=-d.y;\n"
" ma=absd.z;\n"
" }\n"
" else\n"
" {\n"
" face=5;\n"
" sc=-d.x;\n"
" tc=-d.y;\n"
" ma=absd.z;\n"
" }\n"
" }\n"
" if (ma == 0.0)\n"
" {\n"
" s=0.0;\n"
" t=0.0;\n"
" face=-1;\n"
" }\n"
" else\n"
" {\n"
" s=((sc/ma)+1.0)*0.5;\n"
" t=((tc/ma)+1.0)*0.5;\n"
" }\n"
"}\n"
"static inline __attribute__((always_inline))\n"
"float LoadSample(thread int& positionX,thread int& positionY,thread const int& c,thread const int& n,constant gridSampleBuffer& uGridSampleParam,const device sourceBuffer0& uInput)\n"
"{\n"
" int width=uGridSampleParam.inShape.x;\n"
" int height=uGridSampleParam.inShape.y;\n"
" positionX=clamp(positionX,0,width-1);\n"
" positionY=clamp(positionY,0,height-1);\n"
" float V=uInput.data[(((0+(positionX*uGridSampleParam.inShape.z))+((positionY*width)*uGridSampleParam.inShape.z))+(((n*width)*height)*uGridSampleParam.inShape.z))+c];\n"
" return V;\n"
"}\n"
"kernel void main0(device destBuffer& uOutput [[buffer(0)]],const device sourceBuffer0& uInput [[buffer(1)]],const device sourceBuffer1& uGrid [[buffer(2)]],constant gridSampleBuffer& uGridSampleParam [[buffer(3)]],uint3 gl_GlobalInvocationID [[thread_position_in_grid]])\n"
"{\n"
" int pos=int(gl_GlobalInvocationID.x);\n"
" int4 inputShape=uGridSampleParam.inShape;\n"
" int4 outputShape=uGridSampleParam.outShape;\n"
" int total=((outputShape.x*outputShape.y)*outputShape.z)*outputShape.w;\n"
" if (pos<total)\n"
" {\n"
" int x=pos % outputShape.x;\n"
" int tmp=pos/outputShape.x;\n"
" int y=tmp % outputShape.y;\n"
" tmp /= outputShape.y;\n"
" int z=tmp % outputShape.z;\n"
" int on=tmp/outputShape.z;\n"
" int gridPosition=(((on*outputShape.x)*outputShape.y)+(y*outputShape.x))+x;\n"
" float u=uGrid.data[(inputShape.w*gridPosition)+0];\n"
" float v=uGrid.data[(inputShape.w*gridPosition)+1];\n"
" float w=uGrid.data[(inputShape.w*gridPosition)+2];\n"
" float3 param=float3(u,v,w);\n"
" int param_1;\n"
" float param_2;\n"
" float param_3;\n"
" indexCubeMap(param,param_1,param_2,param_3);\n"
" int face=param_1;\n"
" float gridX=param_2;\n"
" float gridY=param_3;\n"
" float V=0.0;\n"
" if (face >= 0)\n"
" {\n"
" int n=(on*6)+face;\n"
" float cordH=(gridY*float(inputShape.y))-0.5;\n"
" float cordW=(gridX*float(inputShape.x))-0.5;\n"
" int w0_h=int(floor(cordH));\n"
" int w0_w=int(floor(cordW));\n"
" int w1_h=w0_h+1;\n"
" int w1_w=w0_w+1;\n"
" float oneV=1.0;\n"
" int param_4=w0_w;\n"
" int param_5=w0_h;\n"
" int param_6=z;\n"
" int param_7=n;\n"
" float _401=LoadSample(param_4,param_5,param_6,param_7,uGridSampleParam,uInput);\n"
" float i00=_401;\n"
" int param_8=w1_w;\n"
" int param_9=w0_h;\n"
" int param_10=z;\n"
" int param_11=n;\n"
" float _411=LoadSample(param_8,param_9,param_10,param_11,uGridSampleParam,uInput);\n"
" float i01=_411;\n"
" int param_12=w0_w;\n"
" int param_13=w1_h;\n"
" int param_14=z;\n"
" int param_15=n;\n"
" float _421=LoadSample(param_12,param_13,param_14,param_15,uGridSampleParam,uInput);\n"
" float i10=_421;\n"
" int param_16=w1_w;\n"
" int param_17=w1_h;\n"
" int param_18=z;\n"
" int param_19=n;\n"
" float _431=LoadSample(param_16,param_17,param_18,param_19,uGridSampleParam,uInput);\n"
" float i11=_431;\n"
" float f0=float(w1_w)-cordW;\n"
" float f1=oneV-f0;\n"
" float h0=float(w1_h)-cordH;\n"
" float h1=oneV-h0;\n"
" float i0=(i00*f0)+(i01*f1);\n"
" float i1=(i10*f0)+(i11*f1);\n"
" V=(i0*h0)+(i1*h1);\n"
" }\n"
" uOutput.data[(((0+(x*outputShape.z))+((y*outputShape.x)*outputShape.z))+z)+(((on*outputShape.x)*outputShape.y)*outputShape.z)]=V;\n"
" }\n"
"}\n"
;
const char* render_shader_radixsort_cumsum_metal =
"using namespace metal;\n"
"struct constBuffer\n"
"{\n"
" int4 point;\n"
"};\n"
"struct pointoffset\n"
"{\n"
" uint4 data[1];\n"
"};\n"
"struct pointoffsetSum\n"
"{\n"
" uint4 data[1];\n"
"};\n"
"kernel void main0(device pointoffsetSum& uPointoffsetSum [[buffer(0)]],const device pointoffset& uPointoffset [[buffer(1)]],constant constBuffer& uConstant [[buffer(2)]],uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])\n"
"{\n"
" threadgroup uint local_sum[LOCAL_SIZE];\n"
" int tId=int(gl_LocalInvocationID.x);\n"
" int size=(uConstant.point.x+3)/4;\n"
" int curOffset=0;\n"
" uint sum=0u;\n"
" uint4 threadBuffer[UNIT];\n"
" uint _233;\n"
" while (curOffset<size)\n"
" {\n"
" int sta=(tId*UNIT)+curOffset;\n"
" int fin=min((sta+UNIT),size);\n"
" for (int i=sta; i<fin; i++)\n"
" {\n"
" int lpos=i-sta;\n"
" uint4 p0=uPointoffset.data[i];\n"
" p0.y += p0.x;\n"
" p0.z += p0.y;\n"
" p0.w += p0.z;\n"
" threadBuffer[lpos]=p0;\n"
" }\n"
" int _112=sta+1;\n"
" for (int i_1=_112; i_1<fin; i_1++)\n"
" {\n"
" int lpos_1=i_1-sta;\n"
" uint4 p0_1=threadBuffer[lpos_1];\n"
" uint4 p1=threadBuffer[lpos_1-1];\n"
" p0_1 += uint4(p1.w);\n"
" threadBuffer[lpos_1]=p0_1;\n"
" }\n"
" local_sum[tId]=threadBuffer[(fin-sta)-1].w;\n"
" threadgroup_barrier(mem_flags::mem_threadgroup);\n"
" if (fin>sta)\n"
" {\n"
" for (uint stride=1u; stride <= LOCAL_SIZE/2u; stride *= 2u)\n"
" {\n"
" uint id=((uint(tId+1)*stride)*2u)-1u;\n"
" if (id<LOCAL_SIZE)\n"
" {\n"
" local_sum[id] += local_sum[id-stride];\n"
" }\n"
" threadgroup_barrier(mem_flags::mem_threadgroup);\n"
" }\n"
" for (uint stride_1=LOCAL_SIZE/4u; stride_1>0u; stride_1 /= 2u)\n"
" {\n"
" uint id_1=((uint(tId+1)*stride_1)*2u)-1u;\n"
" if ((id_1+stride_1)<LOCAL_SIZE)\n"
" {\n"
" uint _220=id_1+stride_1;\n"
" local_sum[_220] += local_sum[id_1];\n"
" }\n"
" threadgroup_barrier(mem_flags::mem_threadgroup);\n"
" }\n"
" if (tId>0)\n"
" {\n"
" _233=local_sum[tId-1];\n"
" }\n"
" else\n"
" {\n"
" _233=0u;\n"
" }\n"
" uint sum0=_233;\n"
" for (int i_2=sta; i_2<fin; i_2++)\n"
" {\n"
" int lpos_2=i_2-sta;\n"
" uPointoffsetSum.data[i_2]=threadBuffer[lpos_2]+uint4(sum+sum0);\n"
" }\n"
" sum += local_sum[LOCAL_SIZE-1];\n"
" }\n"
" curOffset += LOCAL_SIZE*UNIT;\n"
" if (curOffset<size)\n"
" {\n"
" threadgroup_barrier(mem_flags::mem_threadgroup);\n"
" }\n"
" }\n"
"}\n"
;
const char* render_shader_radixsort_reorder_option_metal =
"using namespace metal;\n"
"template<typename T,size_t Num>\n"
"struct spvUnsafeArray\n"
"{\n"
" T elements[Num ? Num : 1];\n"
" \n"
" thread T& operator [] (size_t pos) thread\n"
" {\n"
" return elements[pos];\n"
" }\n"
" constexpr const thread T& operator [] (size_t pos) const thread\n"
" {\n"
" return elements[pos];\n"
" }\n"
" \n"
" device T& operator [] (size_t pos) device\n"
" {\n"
" return elements[pos];\n"
" }\n"
" constexpr const device T& operator [] (size_t pos) const device\n"
" {\n"
" return elements[pos];\n"
" }\n"
" \n"
" constexpr const constant T& operator [] (size_t pos) const constant\n"
" {\n"
" return elements[pos];\n"
" }\n"
" \n"
" threadgroup T& operator [] (size_t pos) threadgroup\n"
" {\n"
" return elements[pos];\n"
" }\n"
" constexpr const threadgroup T& operator [] (size_t pos) const threadgroup\n"
" {\n"
" return elements[pos];\n"
" }\n"
"};\n"
"struct variableBuffer\n"
"{\n"
" uint4 off;\n"
"};\n"
"struct variablepBuffer\n"
"{\n"
" uint4 off;\n"
"};\n"
"struct his\n"
"{\n"
" uint data[1];\n"
"};\n"
"struct pointO\n"
"{\n"
" uint2 data[1];\n"
"};\n"
"struct pointI\n"
"{\n"
" uint2 data[1];\n"
"};\n"
"kernel void main0(device pointI& uPointKeysOutput [[buffer(0)]],const device pointO& uPointKeysInput [[buffer(1)]],const device his& uHistogram [[buffer(2)]],constant variableBuffer& uOffset [[buffer(3)]],constant variablepBuffer& uPass [[buffer(4)]],uint3 gl_NumWorkGroups [[threadgroups_per_grid]],uint3 gl_GlobalInvocationID [[thread_position_in_grid]])\n"
"{\n"
" uint groupSize=gl_NumWorkGroups.x;\n"
" uint threadId=gl_GlobalInvocationID.x;\n"
" uint threadNumber=groupSize*LOCAL_SIZE;\n"
" uint totalSize=(uOffset.off.x+1u)/2u;\n"
" uint size=((totalSize+threadNumber)-1u)/threadNumber;\n"
" uint sta=threadId*size;\n"
" uint fin=min((sta+size),totalSize);\n"
" uint div=uPass.off.x;\n"
" sta *= 2u;\n"
" fin *= 2u;\n"
" uint modNum=BIN_NUMBER-1u;\n"
" spvUnsafeArray<uint,BIN_NUMBER> offsets;\n"
" for (int i=0; i<BIN_NUMBER; i++)\n"
" {\n"
" uint pos=(uint(i)*threadNumber)+threadId;\n"
" if (pos == 0u)\n"
" {\n"
" offsets[i]=0u;\n"
" }\n"
" else\n"
" {\n"
" offsets[i]=uHistogram.data[pos-1u];\n"
" }\n"
" }\n"
" for (uint i_1=sta; i_1<fin; i_1++)\n"
" {\n"
" uint2 V=uPointKeysInput.data[i_1];\n"
" uint key=(V.x >> div) & modNum;\n"
" uint pos_1=offsets[key];\n"
" uPointKeysOutput.data[pos_1]=V;\n"
" offsets[key]++;\n"
" }\n"
"}\n"
;
const char* render_shader_cumsum_metal =
"using namespace metal;\n"
"template<typename T,size_t Num>\n"
"struct spvUnsafeArray\n"
"{\n"
" T elements[Num ? Num : 1];\n"
" \n"
" thread T& operator [] (size_t pos) thread\n"
" {\n"
" return elements[pos];\n"
" }\n"
" constexpr const thread T& operator [] (size_t pos) const thread\n"
" {\n"
" return elements[pos];\n"
" }\n"
" \n"
" device T& operator [] (size_t pos) device\n"
" {\n"
" return elements[pos];\n"
" }\n"
" constexpr const device T& operator [] (size_t pos) const device\n"
" {\n"
" return elements[pos];\n"
" }\n"
" \n"
" constexpr const constant T& operator [] (size_t pos) const constant\n"
" {\n"
" return elements[pos];\n"
" }\n"
" \n"
" threadgroup T& operator [] (size_t pos) threadgroup\n"
" {\n"
" return elements[pos];\n"
" }\n"
" constexpr const threadgroup T& operator [] (size_t pos) const threadgroup\n"
" {\n"
" return elements[pos];\n"
" }\n"
"};\n"
"struct constBuffer\n"
"{\n"
" int4 point;\n"
"};\n"
"struct pointoffset\n"
"{\n"
" uint4 data[1];\n"
"};\n"
"struct pointoffsetSum\n"
"{\n"
" uint4 data[1];\n"
"};\n"
"constant uint3 gl_WorkGroupSize [[maybe_unused]]=uint3(256u,1u,1u);\n"
"kernel void main0(device pointoffsetSum& uPointoffsetSum [[buffer(0)]],const device pointoffset& uPointoffset [[buffer(1)]],constant constBuffer& uConstant [[buffer(2)]],uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])\n"
"{\n"
" threadgroup uint local_sum[256];\n"
" int tId=int(gl_LocalInvocationID.x);\n"
" int size=(uConstant.point.x+3)/4;\n"
" int curOffset=0;\n"
" uint sum=0u;\n"
" spvUnsafeArray<uint4,128> threadBuffer;\n"
" while (curOffset<size)\n"
" {\n"
" int sta=(tId*128)+curOffset;\n"
" int fin=min((sta+128),size);\n"
" for (int i=sta; i<fin; i++)\n"
" {\n"
" int lpos=i-sta;\n"
" uint4 p0=uPointoffset.data[i];\n"
" p0.y += p0.x;\n"
" p0.z += p0.y;\n"
" p0.w += p0.z;\n"
" threadBuffer[lpos]=p0;\n"
" }\n"
" int _112=sta+1;\n"
" for (int i_1=_112; i_1<fin; i_1++)\n"
" {\n"
" int lpos_1=i_1-sta;\n"
" uint4 p0_1=threadBuffer[lpos_1];\n"
" uint4 p1=threadBuffer[lpos_1-1];\n"
" p0_1 += uint4(p1.w);\n"
" threadBuffer[lpos_1]=p0_1;\n"
" }\n"
" local_sum[tId]=threadBuffer[(fin-sta)-1].w;\n"
" threadgroup_barrier(mem_flags::mem_threadgroup);\n"
" if (fin>sta)\n"
" {\n"
" for (int i_2=0; i_2<tId; i_2++)\n"
" {\n"
" sum += local_sum[i_2];\n"
" }\n"
" for (int i_3=sta; i_3<fin; i_3++)\n"
" {\n"
" int lpos_2=i_3-sta;\n"
" uPointoffsetSum.data[i_3]=threadBuffer[lpos_2]+uint4(sum);\n"
" }\n"
" for (int i_4=tId; i_4<256; i_4++)\n"
" {\n"
" sum += local_sum[i_4];\n"
" }\n"
" }\n"
" curOffset += 32768;\n"
" if (curOffset<size)\n"
" {\n"
" threadgroup_barrier(mem_flags::mem_threadgroup);\n"
" }\n"
" }\n"
"}\n"
;
const char* render_shader_rastersort_count_valid_number_metal =
"using namespace metal;\n"
"constant uint _25=(uint(LOCAL_SIZE)+0u);\n"
"struct constBuffer\n"
"{\n"
" uint4 point;\n"
"};\n"
"struct sourceBuffer1\n"
"{\n"
" float4 data[1];\n"
"};\n"
"struct sourceBuffer0\n"
"{\n"
" TYPE data[1];\n"
"};\n"
"struct histogram\n"
"{\n"
" uint data[1];\n"
"};\n"
"#ifndef SPIRV_CROSS_CONSTANT_ID_0\n"
"#define SPIRV_CROSS_CONSTANT_ID_0 1u\n"
"#endif\n"
"constant uint _168=SPIRV_CROSS_CONSTANT_ID_0;\n"
"#ifndef SPIRV_CROSS_CONSTANT_ID_1\n"
"#define SPIRV_CROSS_CONSTANT_ID_1 1u\n"
"#endif\n"
"constant uint _169=SPIRV_CROSS_CONSTANT_ID_1;\n"
"#ifndef SPIRV_CROSS_CONSTANT_ID_2\n"
"#define SPIRV_CROSS_CONSTANT_ID_2 1u\n"
"#endif\n"
"constant uint _170=SPIRV_CROSS_CONSTANT_ID_2;\n"
"constant uint3 gl_WorkGroupSize [[maybe_unused]]=uint3(_168,_169,_170);\n"
"kernel void main0(device histogram& uHistogram [[buffer(0)]],const device sourceBuffer0& uAttr [[buffer(1)]],const device sourceBuffer1& uViewProj [[buffer(2)]],constant constBuffer& uConstant [[buffer(3)]],uint3 gl_NumWorkGroups [[threadgroups_per_grid]],uint3 gl_GlobalInvocationID [[thread_position_in_grid]])\n"
"{\n"
" uint groupSize=gl_NumWorkGroups.x;\n"
" uint threadId=gl_GlobalInvocationID.x;\n"
" uint binSize=0u;\n"
" uint threadNumber=groupSize*LOCAL_SIZE;\n"
" uint totalSize=uConstant.point.x;\n"
" uint size=((totalSize+threadNumber)-1u)/threadNumber;\n"
" uint sta=threadId*size;\n"
" uint fin=min((sta+size),totalSize);\n"
" for (uint pos=sta; pos<fin; pos++)\n"
" {\n"
" float4 vp0=uViewProj.data[0];\n"
" float4 vp1=uViewProj.data[1];\n"
" float4 vp2=uViewProj.data[2];\n"
" float4 vp3=uViewProj.data[3];\n"
" float4 attr=float4(uAttr.data[pos]);\n"
" float depth=(((attr.x*vp0.z)+(attr.y*vp1.z))+(attr.z*vp2.z))+vp3.z;\n"
" float dw=(((attr.x*vp0.w)+(attr.y*vp1.w))+(attr.z*vp2.w))+vp3.w;\n"
" depth /= dw;\n"
" if ((depth >= 0.0) && (depth <= 1.0))\n"
" {\n"
" binSize++;\n"
" }\n"
" }\n"
" uHistogram.data[threadId]=binSize;\n"
"}\n"
;
const char* render_shader_int2float_metal =
"using namespace metal;\n"
"struct constBuffer\n"
"{\n"
" int4 size;\n"
" float4 scale;\n"
"};\n"
"struct destBuffer\n"
"{\n"
" float data[1];\n"
"};\n"
"struct sourceBuffer0\n"
"{\n"
" int data[1];\n"
"};\n"
"constant uint3 gl_WorkGroupSize [[maybe_unused]]=uint3(256u,1u,1u);\n"
"kernel void main0(device destBuffer& uOutput [[buffer(0)]],const device sourceBuffer0& uInput [[buffer(1)]],constant constBuffer& uConstant [[buffer(2)]],uint3 gl_GlobalInvocationID [[thread_position_in_grid]])\n"
"{\n"
" int pos=int(gl_GlobalInvocationID.x);\n"
" int4 size=uConstant.size;\n"
" if (pos<size.x)\n"
" {\n"
" uOutput.data[pos]=(float(uInput.data[pos])*uConstant.scale.x)+uConstant.scale.y;\n"
" }\n"
"}\n"
;
const char* render_shader_dfdxdy_metal =
"using namespace metal;\n"
"struct constBuffer\n"
"{\n"
" int4 size;\n"
"};\n"
"struct destBuffer\n"
"{\n"
" float data[1];\n"
"};\n"
"struct sourceBuffer0\n"
"{\n"
" float data[1];\n"
"};\n"
"struct destBuffer1\n"
"{\n"
" float data[1];\n"
"};\n"
"constant uint3 gl_WorkGroupSize [[maybe_unused]]=uint3(256u,1u,1u);\n"
"kernel void main0(device destBuffer& uDx [[buffer(0)]],device destBuffer1& uDy [[buffer(1)]],const device sourceBuffer0& uInput [[buffer(2)]],constant constBuffer& uConstant [[buffer(3)]],uint3 gl_GlobalInvocationID [[thread_position_in_grid]])\n"
"{\n"
" int pos=int(gl_GlobalInvocationID.x);\n"
" int4 size=uConstant.size;\n"
" int total=((size.x*size.y)*size.z)*size.w;\n"
" if (pos<total)\n"
" {\n"
" int x=pos % size.x;\n"
" int tmp=pos/size.x;\n"
" int y=tmp % size.y;\n"
" tmp /= size.y;\n"
" int z=tmp % size.z;\n"
" int n=tmp/size.z;\n"
" int outPos=(((((n*size.x)*size.y)*size.z)+(x*size.z))+((y*size.x)*size.z))+z;\n"
" int xDPos=(((((n*size.x)*size.y)*size.z)+((x+1)*size.z))+((y*size.x)*size.z))+z;\n"
" int yDPos=(((((n*size.x)*size.y)*size.z)+(x*size.z))+(((y+1)*size.x)*size.z))+z;\n"
" if (x<(size.x-1))\n"
" {\n"
" uDx.data[outPos]=uInput.data[xDPos]-uInput.data[outPos];\n"
" }\n"
" else\n"
" {\n"
" uDx.data[outPos]=0.0;\n"
" }\n"
" if (y<(size.y-1))\n"
" {\n"
" uDy.data[outPos]=uInput.data[yDPos]-uInput.data[outPos];\n"
" }\n"
" else\n"
" {\n"
" uDy.data[outPos]=0.0;\n"
" }\n"
" }\n"
"}\n"
;
const char* render_shader_radixsort_histogram_option_metal =
"using namespace metal;\n"
"template<typename T,size_t Num>\n"
"struct spvUnsafeArray\n"
"{\n"
" T elements[Num ? Num : 1];\n"
" \n"
" thread T& operator [] (size_t pos) thread\n"
" {\n"
" return elements[pos];\n"
" }\n"
" constexpr const thread T& operator [] (size_t pos) const thread\n"
" {\n"
" return elements[pos];\n"
" }\n"
" \n"
" device T& operator [] (size_t pos) device\n"
" {\n"
" return elements[pos];\n"
" }\n"
" constexpr const device T& operator [] (size_t pos) const device\n"
" {\n"
" return elements[pos];\n"
" }\n"
" \n"
" constexpr const constant T& operator [] (size_t pos) const constant\n"
" {\n"
" return elements[pos];\n"
" }\n"
" \n"
" threadgroup T& operator [] (size_t pos) threadgroup\n"
" {\n"
" return elements[pos];\n"
" }\n"
" constexpr const threadgroup T& operator [] (size_t pos) const threadgroup\n"
" {\n"
" return elements[pos];\n"
" }\n"
"};\n"
"struct variableBuffer\n"
"{\n"
" uint4 off;\n"
"};\n"
"struct variablepBuffer\n"
"{\n"
" uint4 off;\n"
"};\n"
"struct pointO\n"
"{\n"
" uint4 data[1];\n"
"};\n"
"struct pointI\n"
"{\n"
" uint data[1];\n"
"};\n"
"kernel void main0(device pointI& uHistogram [[buffer(0)]],const device pointO& uPointKeysInput [[buffer(1)]],constant variableBuffer& uOffset [[buffer(2)]],constant variablepBuffer& uPass [[buffer(3)]],uint3 gl_NumWorkGroups [[threadgroups_per_grid]],uint3 gl_GlobalInvocationID [[thread_position_in_grid]])\n"
"{\n"
" uint groupSize=gl_NumWorkGroups.x;\n"
" uint threadId=gl_GlobalInvocationID.x;\n"
" spvUnsafeArray<uint,BIN_NUMBER> binSize;\n"
" for (int i=0; i<BIN_NUMBER; i++)\n"
" {\n"
" binSize[i]=0u;\n"
" }\n"
" uint totalSize=(uOffset.off.x+1u)/2u;\n"
" uint threadNumber=groupSize*LOCAL_SIZE;\n"
" uint size=((totalSize+threadNumber)-1u)/threadNumber;\n"
" uint sta=threadId*size;\n"
" uint fin=min((sta+size),totalSize);\n"
" uint div=uPass.off.x;\n"
" uint modNum=BIN_NUMBER-1u;\n"
" for (uint i_1=sta; i_1<fin; i_1++)\n"
" {\n"
" uint2 key=(uPointKeysInput.data[i_1].xz >> uint2(div)) & uint2(modNum);\n"
" binSize[key.x]++;\n"
" binSize[key.y]++;\n"
" }\n"
" for (int i_2=0; i_2<BIN_NUMBER; i_2++)\n"
" {\n"
" uHistogram.data[(uint(i_2)*threadNumber)+threadId]=binSize[i_2];\n"
" }\n"
"}\n"
;
const char* render_shader_dfdxdygrad_metal =
"using namespace metal;\n"
"struct constBuffer\n"
"{\n"
" int4 size;\n"
"};\n"
"struct destBuffer\n"
"{\n"
" float data[1];\n"
"};\n"
"struct destBuffer1\n"
"{\n"
" float data[1];\n"
"};\n"
"struct sourceBuffer0\n"
"{\n"
" float data[1];\n"
"};\n"
"constant uint3 gl_WorkGroupSize [[maybe_unused]]=uint3(256u,1u,1u);\n"
"kernel void main0(const device destBuffer& uDx [[buffer(0)]],const device destBuffer1& uDy [[buffer(1)]],device sourceBuffer0& uInput [[buffer(2)]],constant constBuffer& uConstant [[buffer(3)]],uint3 gl_GlobalInvocationID [[thread_position_in_grid]])\n"
"{\n"
" int pos=int(gl_GlobalInvocationID.x);\n"
" int4 size=uConstant.size;\n"
" int total=((size.x*size.y)*size.z)*size.w;\n"
" if (pos<total)\n"
" {\n"
" int x=pos % size.x;\n"
" int tmp=pos/size.x;\n"
" int y=tmp % size.y;\n"
" tmp /= size.y;\n"
" int z=tmp % size.z;\n"
" int n=tmp/size.z;\n"
" int outPos=(((((n*size.x)*size.y)*size.z)+(x*size.z))+((y*size.x)*size.z))+z;\n"
" int xDPos=(((((n*size.x)*size.y)*size.z)+((x-1)*size.z))+((y*size.x)*size.z))+z;\n"
" int yDPos=(((((n*size.x)*size.y)*size.z)+(x*size.z))+(((y-1)*size.x)*size.z))+z;\n"
" float summer=0.0;\n"
" if (x>0)\n"
" {\n"
" summer=(summer+uDx.data[xDPos])-uDx.data[outPos];\n"
" }\n"
" if (y>0)\n"
" {\n"
" summer=(summer+uDy.data[yDPos])-uDy.data[outPos];\n"
" }\n"
" uInput.data[outPos]=summer;\n"
" }\n"
"}\n"
;
const char* render_shader_texture2d_metal =
"using namespace metal;\n"
"struct gridSampleBuffer\n"
"{\n"
" int4 inShape;\n"
" int4 outShape;\n"
" uint alignCorners;\n"
"};\n"
"struct sourceBuffer0\n"
"{\n"
" float data[1];\n"
"};\n"
"struct sourceBuffer1\n"
"{\n"
" float data[1];\n"
"};\n"
"struct destBuffer\n"
"{\n"
" float data[1];\n"
"};\n"
"constant uint3 gl_WorkGroupSize [[maybe_unused]]=uint3(256u,1u,1u);\n"
"static inline __attribute__((always_inline))\n"
"float LoadSample(thread int& positionX,thread int& positionY,thread const int& c,thread const int& n,constant gridSampleBuffer& uGridSampleParam,const device sourceBuffer0& uInput)\n"
"{\n"
" int width=uGridSampleParam.inShape.x;\n"
" int height=uGridSampleParam.inShape.y;\n"
" positionX=clamp(positionX,0,width-1);\n"
" positionY=clamp(positionY,0,height-1);\n"
" float V=uInput.data[(((0+(positionX*uGridSampleParam.inShape.z))+((positionY*width)*uGridSampleParam.inShape.z))+(((n*width)*height)*uGridSampleParam.inShape.z))+c];\n"
" return V;\n"
"}\n"
"kernel void main0(device destBuffer& uOutput [[buffer(0)]],const device sourceBuffer0& uInput [[buffer(1)]],const device sourceBuffer1& uGrid [[buffer(2)]],constant gridSampleBuffer& uGridSampleParam [[buffer(3)]],uint3 gl_GlobalInvocationID [[thread_position_in_grid]])\n"
"{\n"
" int pos=int(gl_GlobalInvocationID.x);\n"
" int4 inputShape=uGridSampleParam.inShape;\n"
" int4 outputShape=uGridSampleParam.outShape;\n"
" int total=((outputShape.x*outputShape.y)*outputShape.z)*outputShape.w;\n"
" if (pos<total)\n"
" {\n"
" int x=pos % outputShape.x;\n"
" int tmp=pos/outputShape.x;\n"
" int y=tmp % outputShape.y;\n"
" tmp /= outputShape.y;\n"
" int z=tmp % outputShape.z;\n"
" int n=tmp/outputShape.z;\n"
" int gridPosition=(((n*outputShape.x)*outputShape.y)+(y*outputShape.x))+x;\n"
" float gridX=uGrid.data[(inputShape.w*gridPosition)+0];\n"
" float gridY=uGrid.data[(inputShape.w*gridPosition)+1];\n"
" float cordH=(gridY*float(inputShape.y))-0.5;\n"
" float cordW=(gridX*float(inputShape.x))-0.5;\n"
" int w0_h=int(floor(cordH));\n"
" int w0_w=int(floor(cordW));\n"
" int w1_h=w0_h+1;\n"
" int w1_w=w0_w+1;\n"
" float oneV=1.0;\n"
" int param=w0_w;\n"
" int param_1=w0_h;\n"
" int param_2=z;\n"
" int param_3=n;\n"
" float _215=LoadSample(param,param_1,param_2,param_3,uGridSampleParam,uInput);\n"
" float i00=_215;\n"
" int param_4=w1_w;\n"
" int param_5=w0_h;\n"
" int param_6=z;\n"
" int param_7=n;\n"
" float _225=LoadSample(param_4,param_5,param_6,param_7,uGridSampleParam,uInput);\n"
" float i01=_225;\n"
" int param_8=w0_w;\n"
" int param_9=w1_h;\n"
" int param_10=z;\n"
" int param_11=n;\n"
" float _235=LoadSample(param_8,param_9,param_10,param_11,uGridSampleParam,uInput);\n"
" float i10=_235;\n"
" int param_12=w1_w;\n"
" int param_13=w1_h;\n"
" int param_14=z;\n"
" int param_15=n;\n"
" float _245=LoadSample(param_12,param_13,param_14,param_15,uGridSampleParam,uInput);\n"
" float i11=_245;\n"
" float f0=float(w1_w)-cordW;\n"
" float f1=oneV-f0;\n"
" float h0=float(w1_h)-cordH;\n"
" float h1=oneV-h0;\n"
" float i0=(i00*f0)+(i01*f1);\n"
" float i1=(i10*f0)+(i11*f1);\n"
" float V=(i0*h0)+(i1*h1);\n"
" uOutput.data[(((0+(x*outputShape.z))+((y*outputShape.x)*outputShape.z))+z)+(((n*outputShape.x)*outputShape.y)*outputShape.z)]=V;\n"
" }\n"
"}\n"
;
const char* render_shader_texturecubegrad_metal =
"using namespace metal;\n"
"struct gridSampleBuffer\n"
"{\n"
" int4 inShape;\n"
" int4 outShape;\n"
" uint alignCorners;\n"
"};\n"
"struct sourceBuffer0\n"
"{\n"
" int data[1];\n"
"};\n"
"struct sourceBuffer1\n"
"{\n"
" float data[1];\n"
"};\n"
"struct destBuffer\n"
"{\n"
" float data[1];\n"
"};\n"
"constant uint3 gl_WorkGroupSize [[maybe_unused]]=uint3(256u,1u,1u);\n"
"static inline __attribute__((always_inline))\n"
"void indexCubeMap(thread const float3& d,thread int& face,thread float& s,thread float& t)\n"
"{\n"
" float3 absd;\n"
" absd.x=abs(d.x);\n"
" absd.y=abs(d.y);\n"
" absd.z=abs(d.z);\n"
" face=-1;\n"
" bool _50=absd.x >= absd.y;\n"
" bool _58;\n"
" if (_50)\n"
" {\n"
" _58=absd.x >= absd.z;\n"
" }\n"
" else\n"
" {\n"
" _58=_50;\n"
" }\n"
" float sc;\n"
" float tc;\n"
" float ma;\n"
" if (_58)\n"
" {\n"
" if (d.x>0.0)\n"
" {\n"
" face=0;\n"
" sc=-d.z;\n"
" tc=-d.y;\n"
" ma=absd.x;\n"
" }\n"
" else\n"
" {\n"
" face=1;\n"
" sc=d.z;\n"
" tc=-d.y;\n"
" ma=absd.x;\n"
" }\n"
" }\n"
" bool _92=absd.y >= absd.x;\n"
" bool _100;\n"
" if (_92)\n"
" {\n"
" _100=absd.y >= absd.z;\n"
" }\n"
" else\n"
" {\n"
" _100=_92;\n"
" }\n"
" if (_100)\n"
" {\n"
" if (d.y>0.0)\n"
" {\n"
" face=2;\n"
" sc=d.x;\n"
" tc=d.z;\n"
" ma=absd.y;\n"
" }\n"
" else\n"
" {\n"
" face=3;\n"
" sc=d.x;\n"
" tc=-d.z;\n"
" ma=absd.y;\n"
" }\n"
" }\n"
" bool _128=absd.z >= absd.x;\n"
" bool _136;\n"
" if (_128)\n"
" {\n"
" _136=absd.z >= absd.y;\n"
" }\n"
" else\n"
" {\n"
" _136=_128;\n"
" }\n"
" if (_136)\n"
" {\n"
" if (d.z>0.0)\n"
" {\n"
" face=4;\n"
" sc=d.x;\n"
" tc=-d.y;\n"
" ma=absd.z;\n"
" }\n"
" else\n"
" {\n"
" face=5;\n"
" sc=-d.x;\n"
" tc=-d.y;\n"
" ma=absd.z;\n"
" }\n"
" }\n"
" if (ma == 0.0)\n"
" {\n"
" s=0.0;\n"
" t=0.0;\n"
" face=-1;\n"
" }\n"
" else\n"
" {\n"
" s=((sc/ma)+1.0)*0.5;\n"
" t=((tc/ma)+1.0)*0.5;\n"
" }\n"
"}\n"
"static inline __attribute__((always_inline))\n"
"void WriteSample(thread int& positionX,thread int& positionY,thread const int& c,thread const int& n,thread const float& V_f,constant gridSampleBuffer& uGridSampleParam,device sourceBuffer0& uInput)\n"
"{\n"
" int V=int(V_f*16777216.0);\n"
" int width=uGridSampleParam.inShape.x;\n"
" int height=uGridSampleParam.inShape.y;\n"
" positionX=clamp(positionX,0,width-1);\n"
" positionY=clamp(positionY,0,height-1);\n"
" int _232=atomic_fetch_add_explicit((device atomic_int*)&uInput.data[(((0+(positionX*uGridSampleParam.inShape.z))+((positionY*width)*uGridSampleParam.inShape.z))+(((n*width)*height)*uGridSampleParam.inShape.z))+c],V,memory_order_relaxed);\n"
"}\n"
"kernel void main0(const device destBuffer& uOutput [[buffer(0)]],device sourceBuffer0& uInput [[buffer(1)]],const device sourceBuffer1& uGrid [[buffer(2)]],constant gridSampleBuffer& uGridSampleParam [[buffer(3)]],uint3 gl_GlobalInvocationID [[thread_position_in_grid]])\n"
"{\n"
" int pos=int(gl_GlobalInvocationID.x);\n"
" int4 inputShape=uGridSampleParam.inShape;\n"
" int4 outputShape=uGridSampleParam.outShape;\n"
" int total=((outputShape.x*outputShape.y)*outputShape.z)*outputShape.w;\n"
" if (pos<total)\n"
" {\n"
" int x=pos % outputShape.x;\n"
" int tmp=pos/outputShape.x;\n"
" int y=tmp % outputShape.y;\n"
" tmp /= outputShape.y;\n"
" int z=tmp % outputShape.z;\n"
" int on=tmp/outputShape.z;\n"
" int gridPosition=(((on*outputShape.x)*outputShape.y)+(y*outputShape.x))+x;\n"
" float u=uGrid.data[(inputShape.w*gridPosition)+0];\n"
" float v=uGrid.data[(inputShape.w*gridPosition)+1];\n"
" float w=uGrid.data[(inputShape.w*gridPosition)+2];\n"
" float3 param=float3(u,v,w);\n"
" int param_1;\n"
" float param_2;\n"
" float param_3;\n"
" indexCubeMap(param,param_1,param_2,param_3);\n"
" int face=param_1;\n"
" float gridX=param_2;\n"
" float gridY=param_3;\n"
" float V=uOutput.data[(((0+(x*outputShape.z))+((y*outputShape.x)*outputShape.z))+z)+(((on*outputShape.x)*outputShape.y)*outputShape.z)];\n"
" if (face >= 0)\n"
" {\n"
" int n=(on*6)+face;\n"
" float cordH=(gridY*float(inputShape.y))-0.5;\n"
" float cordW=(gridX*float(inputShape.x))-0.5;\n"
" int w0_h=int(floor(cordH));\n"
" int w0_w=int(floor(cordW));\n"
" int w1_h=w0_h+1;\n"
" int w1_w=w0_w+1;\n"
" float f0=float(w1_w)-cordW;\n"
" float f1=1.0-f0;\n"
" float h0=float(w1_h)-cordH;\n"
" float h1=1.0-h0;\n"
" float f00=(f0*h0)*V;\n"
" float f01=(f1*h0)*V;\n"
" float f10=(f0*h1)*V;\n"
" float f11=(f1*h1)*V;\n"
" int param_4=w0_w;\n"
" int param_5=w0_h;\n"
" int param_6=z;\n"
" int param_7=n;\n"
" float param_8=f00;\n"
" WriteSample(param_4,param_5,param_6,param_7,param_8,uGridSampleParam,uInput);\n"
" int param_9=w1_w;\n"
" int param_10=w0_h;\n"
" int param_11=z;\n"
" int param_12=n;\n"
" float param_13=f01;\n"
" WriteSample(param_9,param_10,param_11,param_12,param_13,uGridSampleParam,uInput);\n"
" int param_14=w0_w;\n"
" int param_15=w1_h;\n"
" int param_16=z;\n"
" int param_17=n;\n"
" float param_18=f10;\n"
" WriteSample(param_14,param_15,param_16,param_17,param_18,uGridSampleParam,uInput);\n"
" int param_19=w1_w;\n"
" int param_20=w1_h;\n"
" int param_21=z;\n"
" int param_22=n;\n"
" float param_23=f11;\n"
" WriteSample(param_19,param_20,param_21,param_22,param_23,uGridSampleParam,uInput);\n"
" }\n"
" }\n"
"}\n"
;