Summary: 9 instances, 3 unique Text Count // TODO: do experiments on choice of work group size 4 TORCH_CHECK(Ms_val.dtype() == torch::kDouble); // TODO: add support for float 4 // TODO: set stream according to torch 1