def output_handler()

in code/inference.py [0:0]


def output_handler(data, context):
    """Post-process TensorFlow Serving output before it is returned to the client.
    Args:
        data (obj): the TensorFlow serving response
        context (Context): an object containing request and configuration details
    Returns:
        (bytes, string): data to return to client, response content type
    """
    logging.info("Entering output handler")
    
    if data.status_code != 200:
        raise ValueError(data.content.decode('utf-8'))

    logging.info("reading predictions as data.content")
    response_content_type = context.accept_header
    prediction = data.content
    pred = json.loads(prediction.decode('utf-8'))
    
    logging.info("postpro1: select argmax")
    pred_max = [list(np.array(k).argmax(-1)) for k in pred['outputs']]
    
    logging.info("postpro2: numbers to tags")
    y_pred = postpro.y2label(pred_max, int2tag, mask=0)
    
    logging.info("postpro3: remapping splits to origin index")
    flat_y_pred, _ = postpro.map_split_preds_to_idx(y_pred, split_idx)
    
    logging.info("postpro4: output formatting to dicts")
    nerc_prop_list = [postpro.preds_to_dict_single(s,y) for s,y in zip(sentences,flat_y_pred)]
    
    logging.info("postpro5: mapping back to id")
    pred_dict_list = [{'id':ids[i],'sentence':' '.join(sentences[i]),'nerc_properties':nerc_prop_list[i]} for i, x in enumerate(ids)]
    
    logging.info("postpro6: list of dicts to json")
    pred_json = json.dumps(pred_dict_list)

    return pred_json, response_content_type