in optimum/gptq/quantizer.py [0:0]
def _replace_by_quant_layers(self, module: nn.Module, names: List[str], name: str = ""):
"""
Replaces linear layers in `module` by `QuantLinear`
Args:
module (`nn.Module`):
Module to quantize
names (`List[str]`):
List of names of the module to quantize
name (`str`, defaults to `""`):
To keep track of the name of the current module
"""
if isinstance(module, self.quant_linear):
return
for attr in dir(module):
layer = getattr(module, attr)
name1 = name + "." + attr if name != "" else attr
if name1 in names:
device = get_device(layer)
delattr(module, attr)
if isinstance(layer, nn.Linear):
in_features = layer.in_features
out_features = layer.out_features
elif isinstance(layer, nn.Conv2d):
in_features = layer.in_channels
out_features = layer.out_channels
elif isinstance(layer, Conv1D):
in_features = layer.weight.shape[0]
out_features = layer.weight.shape[1]
bias = layer.bias is not None
if is_gptqmodel_available():
new_layer = self.quant_linear(
self.bits,
self.group_size,
self.desc_act,
self.sym,
in_features,
out_features,
bias,
weight_dtype=layer.weight.dtype,
)
else:
if not (self.desc_act) or self.group_size == -1:
new_layer = self.quant_linear(
self.bits,
self.group_size,
in_features,
out_features,
bias,
use_cuda_fp16=self.use_cuda_fp16,
weight_dtype=layer.weight.dtype,
)
else:
new_layer = self.quant_linear(
self.bits,
self.group_size,
in_features,
out_features,
bias,
weight_dtype=layer.weight.dtype,
)
new_layer.device = device
setattr(module, attr, new_layer.to(device))
for name1, child in module.named_children():
self._replace_by_quant_layers(child, names, name + "." + name1 if name != "" else name1)