in theseus/extlib/cusolver_lu_solver.cpp [281:304]
void CusolverLUSolver::solve(const torch::Tensor& b) {
TORCH_CHECK(b.device().is_cuda());
TORCH_CHECK(b.dim() == 2);
TORCH_CHECK(b.size(0) == factoredBatchSize);
TORCH_CHECK(b.size(1) == numRows);
at::Tensor b_array_cpu = torch::empty(factoredBatchSize * sizeof(double*),
torch::TensorOptions(torch::kByte));
double* pB = b.data_ptr<double>();
double** pB_array_cpu = (double**)b_array_cpu.data_ptr<uint8_t>();
for(int i = 0; i < factoredBatchSize; i++) {
pB_array_cpu[i] = pB + numRows * i;
}
at::Tensor b_array = b_array_cpu.cuda();
at::Tensor temp = torch::empty(numRows * 2 * factoredBatchSize,
torch::TensorOptions(torch::kDouble).device(A_rowPtr.device()));
CUSOLVER_CHECK(cusolverRfBatchSolve(cusolverRfH,
P.data_ptr<int>(), Q.data_ptr<int>(),
1, // nrhs
temp.data_ptr<double>(), numRows,
(double**)b_array.data_ptr<uint8_t>(), numRows));
}