def unpack_v2()

in optimum/quanto/tensor/weights/awq/packed.py [0:0]


def unpack_v2(packed):
    """Unpack a packed int16 tensor to a larger uint8 tensor

    Applies pack operations in reverse order (see pack_v2 method for details).
    Warning: very slow, to be used for debug only.

    Args:
        packed (`torch.Tensor`):
            The packed `torch.int16` tensor

    Returns:
        An unpacked uint8 `torch.Tensor` expanded along the first dimension.
    """
    assert packed.device.type in ["cuda", "xpu"]
    assert packed.ndim == 2
    I = 4
    S = 64
    N_div_I, K = packed.shape
    N = N_div_I * I
    # Reshape (N // I, K) -> (N // I, K // S, S, 1)
    unpacked = packed.reshape(N // I, K // S, S, 1)
    # Convert to uint16 (through numpy because not supported by pytorch)
    unpacked = unpacked.cpu().numpy().astype(np.uint16)
    # Unpack (N // I, K, S) -> (N // I, K // S, S, I)
    unpacked = torch.cat(
        [
            torch.tensor((unpacked & 0xF).astype(np.uint8)).to(packed.device),
            torch.tensor(((unpacked & 0xF0) >> 4).astype(np.uint8)).to(packed.device),
            torch.tensor(((unpacked & 0xF00) >> 8).astype(np.uint8)).to(packed.device),
            torch.tensor(((unpacked & 0xF000) >> 12).astype(np.uint8)).to(packed.device),
        ],
        axis=-1,
    )
    # reshape (N // I, K // S, S, I) -> (N // I, K // S, I, S)
    unpacked = unpacked.reshape(N // I, K // S, I, S)
    # transpose (N // I, K // S, I, S) -> (N // I, I, K // S, S)
    unpacked = unpacked.permute(0, 2, 1, 3)
    # deinterleaving (N // I, I, K // S, S) -> (N, K)
    unpacked = unpacked.reshape(N, K)

    # Final steps to reorder (see packing code for explaination)
    unpacked = unpacked.reshape(N, K // 32, 4, 2, 4).permute(0, 1, 2, 4, 3)
    unpacked = unpacked.permute(0, 1, 3, 2, 4)
    unpacked = unpacked.reshape(N, K)

    return unpacked