in optimum/quanto/nn/qmodule.py [0:0]
def qweight(self):
"""Return the module quantized weight
When the module is frozen or does not quantize its weight parameter, it simply
returns the weight.
When the module is not frozen, this property is required to add the dynamic quantization
of the weight parameter to the graph and allow gradients to be propagated to the
underlying weight float values.
"""
if self.weight_qtype is None:
# QModule that does not quantize its weights
return None
if isinstance(self.weight, QTensor):
# Frozen QModule
return self.weight
# Quantize dynamically the weights per-axis
if isinstance(self.optimizer, SymmetricOptimizer):
scale = self.optimizer(self.weight, qtype=self.weight_qtype, axis=0)
shift = None
else:
scale, shift = self.optimizer(
self.weight, qtype=self.weight_qtype, axis=0, group_size=self.weight_group_size
)
return quantize_weight(
self.weight,
qtype=self.weight_qtype,
axis=0,
scale=scale,
shift=shift,
group_size=self.weight_group_size,
activation_qtype=self.activation_qtype,
)