in bitsandbytes/functional.py [0:0]
def get_transform_buffer(shape, dtype, device, to_order, from_order="row", transpose=False):
# init_func = torch.empty
init_func = torch.zeros
dims = len(shape)
if dims == 2:
rows = shape[0]
elif dims == 3:
rows = shape[0] * shape[1]
cols = shape[-1]
state = (shape, to_order)
if transpose:
# swap dims
tmp = rows
rows = cols
cols = tmp
state = (shape[::-1], to_order)
if to_order == "row" or to_order == "col":
return init_func(shape, dtype=dtype, device=device), state
elif to_order == "col32":
# blocks of 32 columns (padded)
cols = 32 * ((cols + 31) // 32)
return init_func((rows, cols), dtype=dtype, device=device), state
elif to_order == "col_turing":
# blocks of 32 columns and 8 rows
cols = 32 * ((cols + 31) // 32)
rows = 8 * ((rows + 7) // 8)
return init_func((rows, cols), dtype=dtype, device=device), state
elif to_order == "col_ampere":
# blocks of 32 columns and 32 rows
cols = 32 * ((cols + 31) // 32)
rows = 32 * ((rows + 31) // 32)
return init_func((rows, cols), dtype=dtype, device=device), state
else:
raise NotImplementedError(f"To_order not supported: {to_order}")