in 2-dl-container/Container-Root/job/bert/direct_benchmark-gpu.py [0:0]
def task(model, encoded_inputs):
global latency_list
begin = time.time()
with torch.cuda.amp.autocast(enabled=half_precision):
input_ids_tensor = encoded_inputs['input_ids']
batch_input_ids_tensor = torch.cat([input_ids_tensor] * batch_size)
attention_mask_tensor = encoded_inputs['attention_mask']
batch_attention_mask_tensor = torch.cat([attention_mask_tensor] * batch_size)
ts_input = batch_input_ids_tensor.cuda(), batch_attention_mask_tensor.cuda()
# neuron_input = encoded_input['input_ids'], encoded_input['attention_mask']
_ = model(*ts_input)
latency_time = time.time() - begin
latency_list.append(latency_time)
return