in src/sagemaker_huggingface_inference_toolkit/handler_service.py [0:0]
def handle(self, data, context):
"""Handles an inference request with input data and makes a prediction.
Args:
data (obj): the request data.
context (obj): metadata on the incoming request data.
Returns:
list[obj]: The return value from the Transformer.transform method,
which is a serialized prediction result wrapped in a list if
inference is successful. Otherwise returns an error message
with the context set appropriately.
"""
try:
if not self.initialized:
if self.attempted_init:
logger.warn(
"Model is not initialized, will try to load model again.\n"
"Please consider increase wait time for model loading.\n"
)
self.initialize(context)
input_data = data[0].get("body")
request_property = context.request_processor[0].get_request_properties()
content_type = utils.retrieve_content_type_header(request_property)
accept = request_property.get("Accept") or request_property.get("accept")
if not accept or accept == content_types.ANY:
accept = content_types.JSON
if content_type in content_types.UTF8_TYPES:
input_data = input_data.decode("utf-8")
predict_start = time.time()
response = self.transform_fn(*([self.model, input_data, content_type, accept] + self.transform_extra_arg))
predict_end = time.time()
context.metrics.add_time("Transform Fn", round((predict_end - predict_start) * 1000, 2))
context.set_response_content_type(0, accept)
return [response]
except Exception as e:
raise PredictionException(str(e), 400)