optimum_benchmark/preprocessors/dataset_preprocessor.py [13:29]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    dataset: Dataset,
    pretrained_processor: PretrainedProcessor,
    scenario_config: EnergyStarConfig,
    pretrained_config: PretrainedConfig,
) -> Dataset:
    if scenario_config.input_shapes["batch_size"] == 1:
        # Remove empty samples when batch_size is 1 because empty inputs will make the model fail
        dataset = dataset.filter(lambda example: example[scenario_config.text_column_name] != "")

    if scenario_config.num_samples != -1:
        dataset = dataset.select(range(scenario_config.num_samples))

    if getattr(pretrained_processor, "pad_token", None) is None:
        pretrained_processor.pad_token = pretrained_processor.eos_token

    padding = scenario_config.input_shapes["batch_size"] != 1
    max_length = getattr(pretrained_config, "max_position_embeddings", 512)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



optimum_benchmark/preprocessors/dataset_preprocessor.py [267:283]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    dataset: Dataset,
    pretrained_processor: PretrainedProcessor,
    scenario_config: EnergyStarConfig,
    pretrained_config: PretrainedConfig,
) -> Dataset:
    if scenario_config.input_shapes["batch_size"] == 1:
        # Remove empty samples when batch_size is 1 because empty inputs will make the model fail
        dataset = dataset.filter(lambda example: example[scenario_config.text_column_name] != "")

    if scenario_config.num_samples != -1:
        dataset = dataset.select(range(scenario_config.num_samples))

    if getattr(pretrained_processor, "pad_token", None) is None:
        pretrained_processor.pad_token = pretrained_processor.eos_token

    padding = scenario_config.input_shapes["batch_size"] != 1
    max_length = getattr(pretrained_config, "max_position_embeddings", 512)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



