in src/setfit/trainer.py [0:0]
def evaluate(self, dataset: Optional[Dataset] = None, metric_key_prefix: str = "test") -> Dict[str, float]:
"""
Computes the metrics for a given classifier.
Args:
dataset (`Dataset`, *optional*):
The dataset to compute the metrics on. If not provided, will use the evaluation dataset passed via
the `eval_dataset` argument at `Trainer` initialization.
Returns:
`Dict[str, float]`: The evaluation metrics.
"""
if dataset is not None:
self._validate_column_mapping(dataset)
if self.column_mapping is not None:
logger.info("Applying column mapping to the evaluation dataset")
eval_dataset = self._apply_column_mapping(dataset, self.column_mapping)
else:
eval_dataset = dataset
else:
eval_dataset = self.eval_dataset
if eval_dataset is None:
raise ValueError("No evaluation dataset provided to `Trainer.evaluate` nor the `Trainer` initialzation.")
x_test = eval_dataset["text"]
y_test = eval_dataset["label"]
logger.info("***** Running evaluation *****")
y_pred = self.model.predict(x_test, use_labels=False)
if isinstance(y_pred, torch.Tensor):
y_pred = y_pred.cpu()
# Normalize string outputs
if y_test and isinstance(y_test[0], str):
encoder = LabelEncoder()
encoder.fit(list(y_test) + list(y_pred))
y_test = encoder.transform(y_test)
y_pred = encoder.transform(y_pred)
metric_kwargs = self.metric_kwargs or {}
if isinstance(self.metric, str):
metric_config = "multilabel" if self.model.multi_target_strategy is not None else None
metric_fn = evaluate.load(self.metric, config_name=metric_config)
results = metric_fn.compute(predictions=y_pred, references=y_test, **metric_kwargs)
elif callable(self.metric):
results = self.metric(y_pred, y_test, **metric_kwargs)
else:
raise ValueError("metric must be a string or a callable")
if not isinstance(results, dict):
results = {"metric": results}
self.model.model_card_data.post_training_eval_results(
{f"{metric_key_prefix}_{key}": value for key, value in results.items()}
)
return results