def compute_throughput()

in utils.py [0:0]


def compute_throughput(model, batch_size=128, resolution=224):
    torch.cuda.empty_cache()
    warmup_iters = 3
    num_iters = 30    
    model.eval()
    model.to("cuda")
    timing = []    

    inputs = torch.randn(batch_size, 3, resolution, resolution, device="cuda")

    # warmup
    for _ in range(warmup_iters):
        model(inputs)

    torch.cuda.synchronize()
    for _ in range(num_iters):
        start = time.time()
        model(inputs)
        torch.cuda.synchronize()
        timing.append(time.time() - start)

    timing = torch.as_tensor(timing, dtype=torch.float32)
    return batch_size / timing.mean()