def handler()

in image-classification/code/inference.py [0:0]


def handler(data, context):

    global num_inferences
    num_inferences += 1
    
    print(f'\n************ inference #: {num_inferences}')
    if context.request_content_type == 'application/x-image':
        stream = io.BytesIO(data.read())
        img = Image.open(stream).convert('RGB')
        img = img.resize((WIDTH, HEIGHT))
        img_array = image.img_to_array(img) #, data_format = "channels_first")
        # the image is now in an array of shape (224, 224, 3) or (3, 224, 224) based on data_format
        # need to expand it to add dim for num samples, e.g. (1, 224, 224, 3)
        x = np.expand_dims(img_array, axis=0)
        instance = preprocess_input(x)
        print(f'    final image shape: {instance.shape}')
        print(f'    final image size: {instance.nbytes}')
        del x, img
    else:
        _return_error(415, 'Unsupported content type "{}"'.format(context.request_content_type or 'Unknown'))

    start_time = time.time()
    
    if USE_GRPC:
        prediction = _predict_using_grpc(context, instance)

    else: # use TFS REST API
        inst_json = json.dumps({'instances': instance.tolist()})
        print('rest call')
        response = requests.post(context.rest_uri, data=inst_json)
        if response.status_code != 200:
            raise Exception(response.content.decode('utf-8'))
        res = response.content
        request_size = sys.getsizeof(inst_json)
        response_size = sys.getsizeof(res)
        print('request payload size')
        print(request_size)
        print('response payload size')
        print(response_size)
        prediction = res

    end_time   = time.time()
    latency    = int((end_time - start_time) * 1000)
    print(f'=== TFS invoke took: {latency} ms')
    
    response_content_type = context.accept_header
    return prediction, response_content_type