def unpack()

in optimum/quanto/tensor/weights/tinygemm/packed.py [0:0]


    def unpack(self):
        """Unpack the packed tensor to a torch.Tensor

        Packing is device specific and implemented in undocumented dedicated kernels
        that are synchronized with the corresponding matrix multiplication operation.

        Instead of implementing a dedicated unpacking code, we pass an identity matrix
        to the mm operation with identity scale and shifts to produce the unpacked uint8 weights.

        Returns:
            An unpacked uint8 `torch.Tensor` expanded along the second dimension.
        """
        out_features, in_features = self.size()
        # We need to pass a group_size to the mm and format the scale and shift accordingly,
        # although it does not modify the calculation since we use identity scales and shifts.
        # We arbitrarily choose the smallest group_size to be sure it divides in_features
        group_size = 32
        scale_and_shift_shape = (in_features // group_size, out_features, 2)
        # Initialize identity scale
        id_scale_and_shift = torch.ones(scale_and_shift_shape, dtype=torch.bfloat16, device=self.device)
        # Set shift to mid-point, i.e. 2 **(bits - 1)
        id_scale_and_shift[:, :, 1] = 8

        identity = torch.eye(in_features, dtype=torch.bfloat16, device=self.device)
        if self._data.device.type == "cpu":
            unpacked_data = torch._weight_int4pack_mm_for_cpu(identity, self._data, group_size, id_scale_and_shift)
        else:
            unpacked_data = torch._weight_int4pack_mm(identity, self._data, group_size, id_scale_and_shift)

        return unpacked_data.t().to(torch.uint8)