def handle_request()

in templates/inference-endpoints/preprocessing/1/model.py [0:0]


    def handle_request(self, request: Sequence):
        # Get input tensors
        query = pb_utils.get_input_tensor_by_name(request, 'QUERY').as_numpy().item().decode("utf-8")
        request_output_len = pb_utils.get_input_tensor_by_name(request, 'REQUEST_OUTPUT_LEN')
        # bad_words_dict = pb_utils.get_input_tensor_by_name(request, 'BAD_WORDS_DICT').as_numpy().item()
        # stop_words_dict = pb_utils.get_input_tensor_by_name(request, 'STOP_WORDS_DICT').as_numpy().item()

        # Preprocessing input data.
        # input_id, request_input_len = self._create_request(query)
        encoding = self.tokenizer.encode(query)

        # bad_words = self._to_word_list_format(bad_words_dict)
        # stop_words = self._to_word_list_format(stop_words_dict)

        # Create output tensors. You need pb_utils.Tensor
        # objects to create pb_utils.InferenceResponse.
        bad_words_ids = pb_utils.Tensor('BAD_WORDS_IDS', np.array([[], []], dtype=self.bad_words_ids_dtype))
        stop_words_ids = pb_utils.Tensor('STOP_WORDS_IDS', np.array([[], []], dtype=self.stop_words_ids_dtype))

        input_ids = pb_utils.Tensor('INPUT_ID', np.array([encoding.ids], dtype=self.input_id_dtype))
        request_input_len = pb_utils.Tensor(
            'REQUEST_INPUT_LEN', np.array([[len(encoding.ids)]], dtype=self.request_input_len_dtype)
        )

        # Create InferenceResponse. You can set an error here in case
        # there was a problem with handling this inference request.
        # Below is an example of how you can set errors in inference
        # response:
        #
        # pb_utils.InferenceResponse(
        #    output_tensors=..., TritonError("An error occurred"))
        return pb_utils.InferenceResponse(output_tensors=[
            input_ids, bad_words_ids, stop_words_ids, request_input_len, request_output_len
        ])