in 2-dl-container/Container-Root/job/resnet/direct_benchmark-gpu.py [0:0]
def task(model, cur_img_preprocess):
global latency_list
begin = time.time()
with torch.cuda.amp.autocast(enabled=half_precision):
batch_input_tensor = torch.cat([cur_img_preprocess] * batch_size)
batch_input_tensor_gpu = batch_input_tensor.cuda()
prediction = model(batch_input_tensor_gpu)
latency_time = time.time() - begin
latency_list.append(latency_time)
return