image-classification/code/inference.py [79:107]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        print(f'    final image shape: {instance.shape}')
        print(f'    final image size: {instance.nbytes}')
        del x, img
    else:
        _return_error(415, 'Unsupported content type "{}"'.format(context.request_content_type or 'Unknown'))

    start_time = time.time()
    
    if USE_GRPC:
        prediction = _predict_using_grpc(context, instance)

    else: # use TFS REST API
        inst_json = json.dumps({'instances': instance.tolist()})
        print('rest call')
        response = requests.post(context.rest_uri, data=inst_json)
        if response.status_code != 200:
            raise Exception(response.content.decode('utf-8'))
        res = response.content
        request_size = sys.getsizeof(inst_json)
        response_size = sys.getsizeof(res)
        print('request payload size')
        print(request_size)
        print('response payload size')
        print(response_size)
        prediction = res

    end_time   = time.time()
    latency    = int((end_time - start_time) * 1000)
    print(f'=== TFS invoke took: {latency} ms')
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


object-detection/code/inference.py [75:105]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        print(f'    final image shape: {instance.shape}')
        print(f'    final image size: {instance.nbytes}')
        del x, img
    else:
        _return_error(415, 'Unsupported content type "{}"'.format(context.request_content_type or 'Unknown'))

    start_time = time.time()
    
    if USE_GRPC:
        prediction = _predict_using_grpc(context, instance)

    else: # use TFS REST API
        inst_json = json.dumps({'instances': instance.tolist()})
        print('rest call')
        response = requests.post(context.rest_uri, data=inst_json)
        if response.status_code != 200:
            raise Exception(response.content.decode('utf-8'))
        res = response.content
        request_size = sys.getsizeof(inst_json)
        response_size = sys.getsizeof(res)
        print('request payload size')
        print(request_size)
        print('response payload size')
        print(response_size)
        #below postprocessing slows down inference further so we will leave it to be done at client side
        #prediction_json = {'detection_boxes': json.loads(res)['predictions'][0]['detection_boxes']}
        #prediction = json.dumps(prediction_json)
        prediction = res
    end_time   = time.time()
    latency    = int((end_time - start_time) * 1000)
    print(f'=== TFS invoke took: {latency} ms')
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -