in object-detection/code/inference.py [0:0]
def handler(data, context):
global num_inferences
num_inferences += 1
print(f'\n************ inference #: {num_inferences}')
if context.request_content_type == 'application/x-image':
stream = io.BytesIO(data.read())
img = Image.open(stream).convert('RGB')
img = img.resize((WIDTH, HEIGHT))
img_array = image.img_to_array(img)
img_array = img_array.reshape((HEIGHT, WIDTH, 3)).astype(np.uint8) #"channels_last"
x = np.expand_dims(img_array, axis=0)
instance = x #no additional preprocessing
print(f' final image shape: {instance.shape}')
print(f' final image size: {instance.nbytes}')
del x, img
else:
_return_error(415, 'Unsupported content type "{}"'.format(context.request_content_type or 'Unknown'))
start_time = time.time()
if USE_GRPC:
prediction = _predict_using_grpc(context, instance)
else: # use TFS REST API
inst_json = json.dumps({'instances': instance.tolist()})
print('rest call')
response = requests.post(context.rest_uri, data=inst_json)
if response.status_code != 200:
raise Exception(response.content.decode('utf-8'))
res = response.content
request_size = sys.getsizeof(inst_json)
response_size = sys.getsizeof(res)
print('request payload size')
print(request_size)
print('response payload size')
print(response_size)
#below postprocessing slows down inference further so we will leave it to be done at client side
#prediction_json = {'detection_boxes': json.loads(res)['predictions'][0]['detection_boxes']}
#prediction = json.dumps(prediction_json)
prediction = res
end_time = time.time()
latency = int((end_time - start_time) * 1000)
print(f'=== TFS invoke took: {latency} ms')
print('complete')
print(context.accept_header)
response_content_type = context.accept_header
return prediction, response_content_type