in optimum/furiosa/modeling.py [0:0]
def evaluation_loop(self, dataset: Dataset):
"""
Run evaluation and returns metrics and predictions.
Args:
dataset (`datasets.Dataset`):
Dataset to use for the evaluation step.
"""
logger.info("***** Running evaluation *****")
# from transformers import EvalPrediction
from transformers.trainer_pt_utils import nested_concat
from transformers.trainer_utils import EvalLoopOutput
all_preds = None
all_labels = None
for step, inputs in tqdm.tqdm(enumerate(dataset), total=len(dataset)):
has_labels = all(inputs.get(k) is not None for k in self.label_names)
if has_labels:
labels = tuple(np.array([inputs.get(name)]) for name in self.label_names)
if len(labels) == 1:
labels = labels[0]
else:
labels = None
inputs = [
np.array([inputs[key]], dtype=FURIOSA_DTYPE_TO_NUMPY_DTYPE[self.inputs_to_dtype[k]])
for k, key in enumerate(self.input_names)
if key in inputs
]
preds = self.sess.run(inputs)
if len(preds) == 1:
preds = preds[0].numpy()
all_preds = preds if all_preds is None else nested_concat(all_preds, preds, padding_index=-100)
all_labels = labels if all_labels is None else nested_concat(all_labels, labels, padding_index=-100)
if self.compute_metrics is not None and all_preds is not None and all_labels is not None:
metrics = self.compute_metrics(EvalPrediction(predictions=all_preds, label_ids=all_labels))
else:
metrics = {}
return EvalLoopOutput(predictions=all_preds, label_ids=all_labels, metrics=metrics, num_samples=len(dataset))