in benchmark.py [0:0]
def run(self):
def _step():
t_step_start = self.time_fn()
with self.amp_autocast():
output = self.model(self.example_inputs)
t_step_end = self.time_fn(True)
return t_step_end - t_step_start
_logger.info(
f'Running inference benchmark on {self.model_name} for {self.num_bench_iter} steps w/ '
f'input size {self.input_size} and batch size {self.batch_size}.')
with torch.no_grad():
self._init_input()
for _ in range(self.num_warm_iter):
_step()
total_step = 0.
num_samples = 0
t_run_start = self.time_fn()
for i in range(self.num_bench_iter):
delta_fwd = _step()
total_step += delta_fwd
num_samples += self.batch_size
num_steps = i + 1
if num_steps % self.log_freq == 0:
_logger.info(
f"Infer [{num_steps}/{self.num_bench_iter}]."
f" {num_samples / total_step:0.2f} samples/sec."
f" {1000 * total_step / num_steps:0.3f} ms/step.")
t_run_end = self.time_fn(True)
t_run_elapsed = t_run_end - t_run_start
results = dict(
samples_per_sec=round(num_samples / t_run_elapsed, 2),
step_time=round(1000 * total_step / self.num_bench_iter, 3),
batch_size=self.batch_size,
img_size=self.input_size[-1],
param_count=round(self.param_count / 1e6, 2),
)
retries = 0 if self.compiled else 2 # skip profiling if model is scripted
while retries:
retries -= 1
try:
if has_deepspeed_profiling:
macs, _ = profile_deepspeed(self.model, self.input_size)
results['gmacs'] = round(macs / 1e9, 2)
elif has_fvcore_profiling:
macs, activations = profile_fvcore(self.model, self.input_size, force_cpu=not retries)
results['gmacs'] = round(macs / 1e9, 2)
results['macts'] = round(activations / 1e6, 2)
except RuntimeError as e:
pass
_logger.info(
f"Inference benchmark of {self.model_name} done. "
f"{results['samples_per_sec']:.2f} samples/sec, {results['step_time']:.2f} ms/step")
return results