in 2-dl-container/Container-Root/job/bert/direct_benchmark-inf.py [0:0]
def task(model, encoded_inputs):
# def task(model, tokeniz, sentence):
global latency_list
begin = time.time()
input_ids_tensor = encoded_inputs['input_ids']
batch_input_ids_tensor = torch.cat([input_ids_tensor] * batch_size)
attention_mask_tensor = encoded_inputs['attention_mask']
batch_attention_mask_tensor = torch.cat([attention_mask_tensor] * batch_size)
ts_input = batch_input_ids_tensor, batch_attention_mask_tensor
_ = model(*ts_input)
latency_time = time.time() - begin
latency_list.append(latency_time)
return