in modules/opencl/eval.hpp [610:693]
const failable<value> evalKernel(const failable<OpenCLKernel>& fkernel, const value& expr, const size_t gwsize, const value::ValueType type, const size_t n, const OpenCLContext& cl) {
#ifdef WANT_MAINTAINER_OPENCL_PROF
const cl_uint estart = (cl_uint)timens();
const cl_uint pstart = estart;
#endif
if (!hasContent(fkernel))
return mkfailure<value>(fkernel);
const OpenCLKernel kernel = content(fkernel);
// Get a command queue for the specified device type
const cl_command_queue cq = commandq(cl);
// Set the kernel input args
const failable<list<OpenCLBuffer>> args = valuesToKernelArgs(cdr<value>(expr), kernel, cl, cq);
if (!hasContent(args)) {
return mkfailure<value>(args);
}
// Allocate result buffer in device memory
const value fn = car<value>(expr);
const OpenCLResultType rtype = kernelResultType(fn, type, n);
const size_t rsize = rtype.n * rtype.size;
const failable<OpenCLBuffer> rbuf = writeOnlyBuffer(rsize, cl);
if (!hasContent(rbuf))
return mkfailure<value>(rbuf);
// Set it as a kernel output arg
const cl_mem rmem = content(rbuf).mem;
const failable<OpenCLBuffer> rarg = valueToKernelArg((cl_uint)length(cdr<value>(expr)), sizeof(cl_mem), &rmem, rbuf, kernel);
if (!hasContent(rarg))
return mkfailure<value>(rarg);
// Enqueue the kernel, to be executed after all the writes complete
cl_event wevt[32];
const cl_uint nwevt = writeBufferEvents(content(args), wevt);
cl_event kevt;
const cl_int qerr = clEnqueueNDRangeKernel(cq, kernel.k, 1, NULL, &gwsize, NULL, nwevt, nwevt != 0? wevt : NULL, &kevt);
if (qerr != CL_SUCCESS)
return mkfailure<value>(string("Couldn't enqueue kernel task: ") + clError(qerr));
// Enqueue result buffer read, to be executed after the kernel completes
char res[rsize];
cl_event revt;
const cl_int rerr = clEnqueueReadBuffer(cq, rmem, CL_FALSE, 0, rsize, res, 1, &kevt, &revt);
if (rerr != CL_SUCCESS) {
clReleaseEvent(kevt);
return mkfailure<value>(string("Couldn't read from OpenCL device memory: ") + clError(rerr));
}
#ifdef WANT_MAINTAINER_OPENCL_PROF
const cl_uint pend = (cl_uint)timens();
preptime += (pend - pstart);
#endif
// Wait for completion
const cl_int werr = clWaitForEvents(1, &revt);
if (werr != CL_SUCCESS) {
clReleaseEvent(revt);
clReleaseEvent(kevt);
return mkfailure<value>(string("Couldn't wait for kernel completion: ") + clError(werr));
}
#ifdef WANT_MAINTAINER_OPENCL_PROF
profileMemEvents(nwevt, wevt);
profileKernelEvent(kevt);
profileMemEvent(revt);
#endif
// Convert the result to a value
const value v = kernelResultToValue(res, rtype.type);
// Release OpenCL resources
clReleaseEvent(revt);
clReleaseEvent(kevt);
#ifdef WANT_MAINTAINER_OPENCL_PROF
const cl_uint eend = (cl_uint)timens();
evaltime += (eend - estart);
#endif
return v;
}