in utils.py [0:0]
def compute_throughput(model, batch_size=128, resolution=224):
torch.cuda.empty_cache()
warmup_iters = 3
num_iters = 30
model.eval()
model.to("cuda")
timing = []
inputs = torch.randn(batch_size, 3, resolution, resolution, device="cuda")
# warmup
for _ in range(warmup_iters):
model(inputs)
torch.cuda.synchronize()
for _ in range(num_iters):
start = time.time()
model(inputs)
torch.cuda.synchronize()
timing.append(time.time() - start)
timing = torch.as_tensor(timing, dtype=torch.float32)
return batch_size / timing.mean()