in tensorflow_time_two/cc/kernels/time_two_kernels.cu.cc [40:49]
void operator()(const GPUDevice& d, int size, const T* in, T* out) {
// Launch the cuda kernel.
//
// See core/util/cuda_kernel_helper.h for example of computing
// block count and thread_per_block count.
int block_count = 1024;
int thread_per_block = 20;
TimeTwoCudaKernel<T>
<<<block_count, thread_per_block, 0, d.stream()>>>(size, in, out);
}