in simuleval/scorer/scorer.py [0:0]
def __init__(self, args):
self.data = {
"src": self.load_text_file(args.source),
"tgt": self.load_text_file(args.target)
}
self.data_type = args.data_type
self.eval_latency_unit = args.eval_latency_unit
self.sacrebleu_tokenizer = args.sacrebleu_tokenizer
self.no_space = args.no_space
if (
self.data_type == "speech"
and self.eval_latency_unit == "char"
):
logger.error(
"Character level latency for speech-to-text model is not supported at the moment. "
"We will update this feature very soon."
)
sys.exit(1)
logger.info(f"Evaluating on {self.data_type}")
logger.info(f"Source: {os.path.abspath(args.source)}")
logger.info(f"Target: {os.path.abspath(args.target)}")
logger.info(f"Number of sentences: {len(self)}")
self.instances = {}
if self.data_type == "text":
self.instance_class = TextInstance
elif self.data_type == "speech":
self.instance_class = AudioInstance
else:
if self.data_type is None:
logger.error(
"Please specify the data type (text or speech).\n"
)
else:
logger.error(
f"{self.data_type} is not supported, "
"please choose from text or speech.\n"
)
sys.exit(1)
self.reset()