in optimum_benchmark/scenarios/inference/scenario.py [0:0]
def run_text_generation_energy_tracking(self):
self.logger.info("\t+ Running Text Generation energy tracking")
prefill_kwargs = {**self.config.generate_kwargs, **TEXT_GENERATION_PREFILL_OVERRIDES}
count, elapsed, start_time = 0, 0, time.perf_counter()
with self.energy_tracker.track(task_name="prefill"):
while elapsed < self.config.duration or count < self.config.iterations:
self.backend.prefill(self.inputs, prefill_kwargs)
elapsed = time.perf_counter() - start_time
count += 1
prefill_energy = self.energy_tracker.get_energy() / count
self.report.prefill.energy = prefill_energy
self.report.prefill.efficiency = Efficiency.from_energy(
prefill_energy, self.atomic_prefill_volume, unit=PREFILL_EFFICIENCY_UNIT
)
count, elapsed, start_time = 0, 0, time.perf_counter()
with self.energy_tracker.track(task_name="generate"):
while elapsed < self.config.duration or count < self.config.iterations:
self.backend.generate(self.inputs, self.config.generate_kwargs)
elapsed = time.perf_counter() - start_time
count += 1
generate_energy = self.energy_tracker.get_energy() / count
self.report.generate.energy = generate_energy
self.report.generate.efficiency = Efficiency.from_energy(
generate_energy, self.atomic_generate_volume, unit=GENERATE_EFFICIENCY_UNIT
)
decode_energy = generate_energy - prefill_energy
self.report.decode.energy = decode_energy
self.report.decode.efficiency = Efficiency.from_energy(
decode_energy, self.atomic_decode_volume, unit=DECODE_EFFICIENCY_UNIT
)