in dynalab/handler.py [0:0]
def handle(torchserve_data, context):
if not _service.initialized:
_service.initialize(context)
if torchserve_data is None:
return None
start_time = time.time()
all_samples = deserialize(torchserve_data)
n = len(all_samples)
logger.info(
f"Deserialized a batch of size {n} ({n/(time.time()-start_time):.2f} samples / s)"
)
# Adapt this to your model. The GPU has 16Gb of RAM.
batch_size = 128
results = []
samples = []
for i, sample in enumerate(all_samples):
samples.append(sample)
if len(samples) < batch_size and i + 1 < n:
continue
results.extend(handle_mini_batch(_service, samples))
samples = []
assert len(results)
start_time = time.time()
response = "\n".join(json.dumps(r, indent=None, ensure_ascii=False) for r in results)
logger.info(
f"Serialized a batch of size {n} ({n/(time.time()-start_time):.2f} samples / s)"
)
return [response]