def task()

in 2-dl-container/Container-Root/job/bert/direct_benchmark-gpu.py [0:0]


def task(model, encoded_inputs):
    global latency_list
    begin = time.time()


    with torch.cuda.amp.autocast(enabled=half_precision):
        input_ids_tensor = encoded_inputs['input_ids']
        batch_input_ids_tensor = torch.cat([input_ids_tensor] * batch_size)
        attention_mask_tensor = encoded_inputs['attention_mask']
        batch_attention_mask_tensor = torch.cat([attention_mask_tensor] * batch_size)
        ts_input = batch_input_ids_tensor.cuda(), batch_attention_mask_tensor.cuda()
        # neuron_input = encoded_input['input_ids'], encoded_input['attention_mask']
        _ = model(*ts_input)
        latency_time = time.time() - begin

        latency_list.append(latency_time)
    return