src/mlm/scorers.py [192:200]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
            ids_original = np.array(self._tokenizer.convert_tokens_to_ids(tokens_original))

            # Enforce max length
            if len(ids_original) > self._max_length:
                logging.error("Line #{} is too long; will output score of 0 and omit in token counts (but not yet in word counts!)".format(sent_idx+1))
            else:
                sents_expanded += [(sent_idx, ids_original, len(ids_original))]

        return SimpleDataset(sents_expanded)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


src/mlm/scorers.py [1008:1016]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
            ids_original = np.array(self._tokenizer.convert_tokens_to_ids(tokens_original))

            # Enforce max length
            if len(ids_original) > self._max_length:
                logging.error("Line #{} is too long; will output score of 0 and omit in token counts (but not yet in word counts!)".format(sent_idx+1))
            else:
                sents_expanded += [(sent_idx, ids_original, len(ids_original))]

        return SimpleDataset(sents_expanded)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -