in scripts/setfit/distillation_baseline.py [0:0]
def standard_model_distillation(self, train_raw_student, x_test, y_test, num_classes):
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
value2hot = {}
for i in range(num_classes):
a = [0] * num_classes
a[i] = 1
value2hot.update({i: a})
test_dict = {"text": x_test, "score": [value2hot[i] for i in y_test]}
raw_test_ds = Dataset.from_dict(test_dict)
# validation and test sets are the same
ds = {
"train": train_raw_student,
"validation": raw_test_ds,
"test": raw_test_ds,
}
for split in ds:
ds[split] = ds[split].map(self.bl_student_preprocess, remove_columns=["text", "score"])
training_args = TrainingArguments(
output_dir="baseline_distil_model",
learning_rate=self.learning_rate,
per_device_train_batch_size=self.batch_size,
per_device_eval_batch_size=self.batch_size,
num_train_epochs=self.num_epochs,
eval_strategy="no",
save_strategy="no",
load_best_model_at_end=False,
weight_decay=0.01,
push_to_hub=False,
)
# define data_collator
data_collator = DataCollatorWithPadding(tokenizer=self.tokenizer)
# define student model
student_model = AutoModelForSequenceClassification.from_pretrained(
self.student_model_name, num_labels=num_classes
).to(device)
trainer = RegressionTrainer(
student_model,
args=training_args,
train_dataset=ds["train"],
eval_dataset=ds["validation"],
data_collator=data_collator,
tokenizer=self.tokenizer,
compute_metrics=self.compute_metrics_for_regression,
)
trainer.train()
trainer.eval_dataset = ds["test"]
# acc = round(trainer.evaluate()["eval_accuracy"], 3)
score = trainer.evaluate()[f"eval_{self.metric_name}"]
return {self.metric_name: score}