const failable evalKernel()

in modules/opencl/eval.hpp [610:693]


const failable<value> evalKernel(const failable<OpenCLKernel>& fkernel, const value& expr, const size_t gwsize, const value::ValueType type, const size_t n, const OpenCLContext& cl) {

#ifdef WANT_MAINTAINER_OPENCL_PROF
    const cl_uint estart = (cl_uint)timens();
    const cl_uint pstart = estart;
#endif

    if (!hasContent(fkernel))
        return mkfailure<value>(fkernel);
    const OpenCLKernel kernel = content(fkernel);

    // Get a command queue for the specified device type
    const cl_command_queue cq = commandq(cl);

    // Set the kernel input args
    const failable<list<OpenCLBuffer>> args = valuesToKernelArgs(cdr<value>(expr), kernel, cl, cq);
    if (!hasContent(args)) {
        return mkfailure<value>(args);
    }

    // Allocate result buffer in device memory
    const value fn = car<value>(expr);
    const OpenCLResultType rtype = kernelResultType(fn, type, n);
    const size_t rsize = rtype.n * rtype.size;
    const failable<OpenCLBuffer> rbuf = writeOnlyBuffer(rsize, cl);
    if (!hasContent(rbuf))
        return mkfailure<value>(rbuf);

    // Set it as a kernel output arg
    const cl_mem rmem = content(rbuf).mem;
    const failable<OpenCLBuffer> rarg = valueToKernelArg((cl_uint)length(cdr<value>(expr)), sizeof(cl_mem), &rmem, rbuf, kernel);
    if (!hasContent(rarg))
        return mkfailure<value>(rarg);

    // Enqueue the kernel, to be executed after all the writes complete
    cl_event wevt[32];
    const cl_uint nwevt = writeBufferEvents(content(args), wevt);
    cl_event kevt;
    const cl_int qerr = clEnqueueNDRangeKernel(cq, kernel.k, 1, NULL, &gwsize, NULL, nwevt, nwevt != 0? wevt : NULL, &kevt);
    if (qerr != CL_SUCCESS)
        return mkfailure<value>(string("Couldn't enqueue kernel task: ") + clError(qerr));

    // Enqueue result buffer read, to be executed after the kernel completes
    char res[rsize];
    cl_event revt;
    const cl_int rerr = clEnqueueReadBuffer(cq, rmem, CL_FALSE, 0, rsize, res, 1, &kevt, &revt);  
    if (rerr != CL_SUCCESS) {
        clReleaseEvent(kevt);
        return mkfailure<value>(string("Couldn't read from OpenCL device memory: ") + clError(rerr));
    }

#ifdef WANT_MAINTAINER_OPENCL_PROF
    const cl_uint pend = (cl_uint)timens();
    preptime += (pend - pstart);
#endif

    // Wait for completion
    const cl_int werr = clWaitForEvents(1, &revt);
    if (werr != CL_SUCCESS) {
        clReleaseEvent(revt);
        clReleaseEvent(kevt);
        return mkfailure<value>(string("Couldn't wait for kernel completion: ") + clError(werr));
    }

#ifdef WANT_MAINTAINER_OPENCL_PROF
    profileMemEvents(nwevt, wevt);
    profileKernelEvent(kevt);
    profileMemEvent(revt);
#endif

    // Convert the result to a value
    const value v = kernelResultToValue(res, rtype.type);

    // Release OpenCL resources
    clReleaseEvent(revt);
    clReleaseEvent(kevt);

#ifdef WANT_MAINTAINER_OPENCL_PROF
    const cl_uint eend = (cl_uint)timens();
    evaltime += (eend - estart);
#endif

    return v;
}