def handler()

in object-detection/code/inference.py [0:0]


def handler(data, context):

    global num_inferences
    num_inferences += 1
    
    print(f'\n************ inference #: {num_inferences}')
    if context.request_content_type == 'application/x-image':
        stream = io.BytesIO(data.read())
        img = Image.open(stream).convert('RGB')
        img = img.resize((WIDTH, HEIGHT))
        img_array = image.img_to_array(img)
        img_array = img_array.reshape((HEIGHT, WIDTH, 3)).astype(np.uint8) #"channels_last"
        x = np.expand_dims(img_array, axis=0)
        instance = x #no additional preprocessing
        print(f'    final image shape: {instance.shape}')
        print(f'    final image size: {instance.nbytes}')
        del x, img
    else:
        _return_error(415, 'Unsupported content type "{}"'.format(context.request_content_type or 'Unknown'))

    start_time = time.time()
    
    if USE_GRPC:
        prediction = _predict_using_grpc(context, instance)

    else: # use TFS REST API
        inst_json = json.dumps({'instances': instance.tolist()})
        print('rest call')
        response = requests.post(context.rest_uri, data=inst_json)
        if response.status_code != 200:
            raise Exception(response.content.decode('utf-8'))
        res = response.content
        request_size = sys.getsizeof(inst_json)
        response_size = sys.getsizeof(res)
        print('request payload size')
        print(request_size)
        print('response payload size')
        print(response_size)
        #below postprocessing slows down inference further so we will leave it to be done at client side
        #prediction_json = {'detection_boxes': json.loads(res)['predictions'][0]['detection_boxes']}
        #prediction = json.dumps(prediction_json)
        prediction = res
    end_time   = time.time()
    latency    = int((end_time - start_time) * 1000)
    print(f'=== TFS invoke took: {latency} ms')
    
    print('complete')
    print(context.accept_header)
    response_content_type = context.accept_header
    return prediction, response_content_type