in code/inference.py [0:0]
def input_handler(data, context):
""" Pre-process request input before it is sent to TensorFlow Serving REST API
Args:
data (obj): the request data, in format of dict or string
context (Context): an object containing request and configuration details
Returns:
(dict): a JSON-serializable dict that contains request body and headers
"""
logging.info("Starting input handler")
if context.request_content_type == 'application/json':
# pass through json (assumes it's correctly formed)
d = json.load(data)
# Splitting input into words
logging.info("splitting input into words")
global ids
global sentences
ids = [line.get('id', '') for line in d]
sentences = [line.get('sentence', '').split() for line in d]
logging.info("sentence preprocessing (split with max sequence length)")
global split_sentences
global split_idx
split_sentences, split_idx = senpre.split_and_duplicate_index(sentences, MAX_SEQUENCE_LENGTH)
logging.info("creating tags placement (-PAD- for unlabelled data)")
tags_placement = []
for sentence in split_sentences:
tags_placement.append(['-PAD-']*len(sentence))
logging.info("calling convert text to examples")
bert_example = berpre.convert_text_to_examples(split_sentences, tags_placement)
logging.info("convert examples to bert features")
(input_ids, input_masks, segment_ids, _
) = berpre.convert_examples_to_features(tokenizer, bert_example, tag2int, max_seq_length=MAX_SEQUENCE_LENGTH+2)
logging.info("convert bert features to necessary format for tensorflow serving")
input_ids = input_ids.tolist()
input_masks = input_masks.tolist()
segment_ids = segment_ids.tolist()
result = {'inputs':
{"input_word_ids": input_ids,
"input_mask": input_masks,
"input_type_ids": segment_ids
}
}
result = json.dumps(result)
logging.info("returning right input for model in TFS format")
return result