in optimum/runs_base.py [0:0]
def execute(self):
inputs = {}
checked_inputs = {"input_ids", "attention_mask", "token_type_ids", "pixel_values"}
if "input_ids" in self.model_input_names:
inputs["input_ids"] = torch.randint(high=1000, size=(self.batch_size, self.input_length))
if "attention_mask" in self.model_input_names:
inputs["attention_mask"] = torch.ones(self.batch_size, self.input_length, dtype=torch.int64)
if "token_type_ids" in self.model_input_names:
inputs["token_type_ids"] = torch.ones(self.batch_size, self.input_length, dtype=torch.int64)
if "pixel_values" in self.model_input_names:
# TODO support grayscale?
inputs["pixel_values"] = torch.rand(
self.batch_size, 3, self.model.config.image_size, self.model.config.image_size, dtype=torch.float32
)
if np.any([k not in checked_inputs for k in self.model_input_names]):
raise NotImplementedError(
f"At least an input in {self.model_input_names} has no dummy generation for time benchmark."
)
# Warmup
for _ in trange(self.warmup_runs, desc="Warming up"):
self.model.forward(**inputs)
if self.benchmark_duration != 0:
benchmark_duration_ns = self.benchmark_duration * SEC_TO_NS_SCALE
print(f"Running time tracking in {self.benchmark_duration:.1f}s.")
while sum(self.latencies) < benchmark_duration_ns:
# TODO not trak GPU/CPU <--> numpy/torch, need to change the implementation of forward
with self.track():
self.model.forward(**inputs)
self.finalize(benchmark_duration_ns)
return self.to_dict()
else:
benchmarks_stats = {
"nb_forwards": 0,
"throughput": -1,
"latency_mean": -1,
}
return benchmarks_stats