optimum/graphcore/pipelines/fill_mask.py (23 lines of code) (raw):

# Copyright 2021 The HuggingFace Team. All rights reserved. # Copyright (c) 2022 Graphcore Ltd. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from typing import Dict from transformers import FillMaskPipeline from transformers.pipelines.base import GenericTensor, PipelineException class IPUFillMaskPipeline(FillMaskPipeline): def _sanitize_parameters(self, top_k=None, targets=None, **tokenizer_kwargs): preprocess_params = tokenizer_kwargs postprocess_params = {} if targets is not None: target_ids = self.get_target_ids(targets, top_k) postprocess_params["target_ids"] = target_ids if top_k is not None: postprocess_params["top_k"] = top_k if self.tokenizer.mask_token_id is None: raise PipelineException( "fill-mask", self.model.base_model_prefix, "The tokenizer does not define a `mask_token`." ) return preprocess_params, {}, postprocess_params def preprocess(self, inputs, return_tensors=None, **tokenizer_kwargs) -> Dict[str, GenericTensor]: if return_tensors is None: return_tensors = self.framework model_inputs = self.tokenizer(inputs, return_tensors=return_tensors, **tokenizer_kwargs) self.ensure_exactly_one_mask_token(model_inputs) return model_inputs