def __init__()

in server/text_generation_server/layers/gptq/exllama.py [0:0]


    def __init__(self, weight: GPTQWeight, bias):
        super().__init__()
        global MAX_DQ, MAX_INNER, ACT_ORDER, DEVICE
        assert weight.bits == 4

        self.device = weight.qweight.device
        self.qweight = weight.qweight
        self.qzeros = weight.qzeros
        self.scales = weight.scales
        self.g_idx = weight.g_idx.cpu() if weight.g_idx is not None else None
        self.bias = bias if bias is not None else None

        if self.g_idx is not None and (
            (self.g_idx == 0).all()
            or torch.equal(
                weight.g_idx.cpu(),
                torch.tensor(
                    [i // weight.groupsize for i in range(weight.g_idx.shape[0])],
                    dtype=torch.int32,
                ),
            )
        ):
            self.empty_g_idx = True
            self.g_idx = None

        assert self.device.type == "cuda"
        assert self.device.index is not None

        self.q4 = ext_make_q4(
            self.qweight, self.qzeros, self.scales, self.g_idx, self.device.index
        )

        self.height = weight.qweight.shape[0] * 8
        self.width = weight.qweight.shape[1]

        # Infer groupsize from height of qzeros
        self.groupsize = None
        if self.qzeros.shape[0] > 1:
            self.groupsize = (self.qweight.shape[0] * 8) // (self.qzeros.shape[0])

        if self.groupsize is not None:
            assert weight.groupsize == self.groupsize

        # Handle act-order matrix
        if self.g_idx is not None:
            if self.groupsize is None:
                raise ValueError("Found group index but no groupsize. What do?")
            self.act_order = True
        else:
            self.act_order = False

        DEVICE = self.qweight.device

        MAX_DQ = max(MAX_DQ, self.qweight.numel() * 8)

        if self.act_order:
            MAX_INNER = max(MAX_INNER, self.height, self.width)

            ACT_ORDER = True