in image-classification/code/inference.py [0:0]
def handler(data, context):
global num_inferences
num_inferences += 1
print(f'\n************ inference #: {num_inferences}')
if context.request_content_type == 'application/x-image':
stream = io.BytesIO(data.read())
img = Image.open(stream).convert('RGB')
img = img.resize((WIDTH, HEIGHT))
img_array = image.img_to_array(img) #, data_format = "channels_first")
# the image is now in an array of shape (224, 224, 3) or (3, 224, 224) based on data_format
# need to expand it to add dim for num samples, e.g. (1, 224, 224, 3)
x = np.expand_dims(img_array, axis=0)
instance = preprocess_input(x)
print(f' final image shape: {instance.shape}')
print(f' final image size: {instance.nbytes}')
del x, img
else:
_return_error(415, 'Unsupported content type "{}"'.format(context.request_content_type or 'Unknown'))
start_time = time.time()
if USE_GRPC:
prediction = _predict_using_grpc(context, instance)
else: # use TFS REST API
inst_json = json.dumps({'instances': instance.tolist()})
print('rest call')
response = requests.post(context.rest_uri, data=inst_json)
if response.status_code != 200:
raise Exception(response.content.decode('utf-8'))
res = response.content
request_size = sys.getsizeof(inst_json)
response_size = sys.getsizeof(res)
print('request payload size')
print(request_size)
print('response payload size')
print(response_size)
prediction = res
end_time = time.time()
latency = int((end_time - start_time) * 1000)
print(f'=== TFS invoke took: {latency} ms')
response_content_type = context.accept_header
return prediction, response_content_type