in crates/ratchet-core/src/tensor.rs [795:859]
fn resolve_gpu(self, gpu_device: &WgpuDevice, debug: bool) -> Result<Tensor, TensorError> {
let execution_order = self.execution_order();
let mut uniform = CpuUniform::new();
let mut compiled_ops = Vec::with_capacity(execution_order.len());
gpu_device.begin_pass();
let mut allocations = gpu_device.allocate_cfg(&execution_order, gpu_device)?;
#[cfg(feature = "plotting")]
crate::plot::render_to_file(execution_order.last().unwrap(), "prealloc.svg").unwrap();
#[cfg(feature = "debug")]
let mut compute_dsts = Vec::new();
for t in execution_order.iter() {
log::debug!("Compiling: {:?}", t.op().name());
assert!(t.device().is_gpu());
if t.resolved() {
continue;
}
let id = t.id();
let inner = allocations.remove(&id).ok_or(TensorError::NoStorage(id))?;
t.update_storage(Storage::GPU(GPUBuffer {
inner,
alignment: t.dt().size_of(),
}));
let to_modify = t.op().srcs()[0];
let can_inplace = t.op().supports_inplace() && to_modify.strong_count() == 1;
if let Some(compiled_op) = t.compile_gpu(&mut uniform, gpu_device, can_inplace, debug) {
compiled_ops.push(compiled_op);
#[cfg(feature = "debug")]
compute_dsts.push(*t);
} else {
log::warn!("Compilation failed for operation: {:?}", t.op().name());
}
}
#[cfg(feature = "plotting")]
crate::plot::render_to_file(execution_order.last().unwrap(), "alloc.svg").unwrap();
let executable = Executable::new(
compiled_ops,
uniform.into_gpu(gpu_device)?,
#[cfg(feature = "debug")]
compute_dsts,
);
#[cfg(feature = "debug")]
let index = if debug {
if cfg!(feature = "debug") {
executable.dispatch_debugging(device).unwrap()
} else {
panic!("Debugging is only available in debug builds. Call `resolve()` instead of `resolve_debug()`.")
}
} else {
executable.dispatch(device).unwrap()
};
#[cfg(not(feature = "debug"))]
let index = executable.dispatch(gpu_device).unwrap();
gpu_device.poll(wgpu::MaintainBase::WaitForSubmissionIndex(index));
Ok(self)
}