in diffq/base.py [0:0]
def get_quantized_state(self, packed=True, torch_pack=False):
"""
Return a quantized representation fo the weights. If `packed` is True,
this will also perform bitpacking to ensure optimal store.
If `torck_pack` is true, the bitpacking from `torch_pack` will be used.
It is slower (except maybe on GPU), but is compatible with torchscript.
You can restore a model from a quantized state either using
`BaseQuantizer.restore_quantized_state` or `diffq.restore_quantized_state`
if you do not have the original quantizer around anymore.
"""
float16_params = []
for p in self._float16:
q = p.data.half()
float16_params.append(q)
if torch_pack:
pack_fn = torch_pack_mod.pack
else:
pack_fn = bitpack.pack
all_quantized = []
for qparam in self._qparams:
if qparam.other is not None:
continue
quantized = self._quantize_param(qparam)
if packed:
quantized = self._bit_pack_param(qparam, quantized, pack_fn=pack_fn)
all_quantized.append(quantized)
state = {
"quantized": all_quantized,
"float16": float16_params,
"others": [p.data.clone() for p in self._others],
}
kwargs = dict(self._init_kwargs)
kwargs.pop("model")
state["meta"] = {
"init_kwargs": kwargs,
"klass": self.__class__,
"packed": packed,
"torch_pack": torch_pack
}
return state