in optimum/quanto/models/transformers_models.py [0:0]
def _save_pretrained(self, save_directory: Path) -> None:
model = self._wrapped
if getattr(model.config, "tie_word_embeddings", True):
# The original model had tied embedding inputs and outputs
if isinstance(model.get_input_embeddings(), QModuleMixin) or isinstance(
model.get_output_embeddings(), QModuleMixin
):
# At least one of the two is quantized, so they are not tied anymore
model.config.tie_word_embeddings = False
self._wrapped.save_pretrained(save_directory, safe_serialization=True)
# Save quantization map to be able to reload the model
qmap_name = os.path.join(save_directory, self._qmap_name())
qmap = quantization_map(self._wrapped)
with open(qmap_name, "w", encoding="utf8") as f:
json.dump(qmap, f, indent=4)